Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(httr)
- library(rjson)
- library(XML)
- library(stringr)
- library(curl)
- httr::set_config(config(ssl_verifypeer = 0L))
- options(HTTPUserAgent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0")
- #Define Input
- abb_race_list = list(list("AIK","AJX","ABT","ALB","ANF","AQU","ARP","AP","ASD","ATH","ATL","ATO","BSR","BM","BMF","BEL","BTP","BHP","BEU","BRD","BRO","BCF","CRC","CAM","CBY","CAS","CWF","CHA","CHL","CPW","CD","CNL","NS2","CLS","DAY","DMR","NS9","DEL","DED","DEP","DET","DXD","DUQ","UN","ELK","ELP","EMD","EMT","ED","EUR","EVD","FG","FAI","FMT","FPL","FAX","FP","FPX","FH","FER","FL","FON","FE","FTP","FX","FNO","GV","GIL","GLN","GG","SAF","GN","GPR","GRP","GBF","GF","GLD","GRM","N11","GRA","DUN","GP","GPW","BRN","HST","HAW","HP","HIA","CT","HOL","HOO","HPO","HCF","IND","ING","JRM","KSP","KAM","KEE","KD","KIN","LAM","LBT","LRL","BOI","LBG","LEX","LNN","LEV","LS","LA","LRC","LAD","MVR","MAL","MAN","MAF","MAR","MD","MED","MDA","MID","MC","MS","MIL","MON","NS4","MTH","MTP","MOR","MNR","MPM","S11","FAR","NP","NVD","OKR","OTH","OTP","OSA","OP","OTC","ONE","PMB","PRX","NS6","PEN","NS5","UNI","PW","PHA","PIC","PIM","PMT","PNL","PLN","POD","PM","PRM","PID","RB","RDM","RP","RET","RIL","RD","RKM","RPD","RUI","RUP","SAC","HOU","SJD","SLR","SAN","SDY","SA","SON","SFE","SR","SAR","NS8","SEO","SHW","SHD","SOL","SOP","SPT","STP","STK","STN","SH","SUF","N12","SUD","SND","SUN","SRP","SWF","TAM","WDS","TDN","TIL","TIM","TGD","TRY","TUP","TP","WTS","WW","WBR","WMF","WPR","ELY","WRD","WIL","WNT","WO","WYO","YM","YAV","YD","ZIA"),list("AIKEN","AJAX DOWNS","ALBERTA DOWNS","ALBUQUERQUE","ANTHONY DOWNS","AQUEDUCT","ARAPAHOE PARK","ARLINGTON","ASSINIBOIA DOWNS","ATLANTA","ATLANTIC CITY","ATOKAD DOWNS","BARRETTS RACE MEET AT FAIRPLEX","BAY MEADOWS","BAY MEADOWS FAIR","BELMONT PARK","BELTERRA PARK","BETFAIR HOLLYWOOD PARK","BEULAH PARK","BLUE RIBBON DOWNS","BROOKHILL FARM","BROWN COUNTY FAIR","CALDER RACE COURSE","CAMDEN","CANTERBURY PARK","CASSIA COUNTY FAIR","CENTRAL WYOMING FAIR","CHARLESTON","CHARLOTTE","CHIPPEWA DOWNS","CHURCHILL DOWNS","COLONIAL DOWNS","COLONIAL NSA","COLUMBUS","DAYTON","DEL MAR","DELAWARE NSA","DELAWARE PARK","DELTA DOWNS","DESERT PARK","DETROIT RACE COURSE","DIXIE DOWNS","DU QUOIN","EASTERN OREGON LIVESTOCK SHOW","ELKO COUNTY FAIR","ELLIS PARK","EMERALD DOWNS","EMMETT","ENERGY DOWNS","EUREKA","EVANGELINE DOWNS","FAIR GROUNDS","FAIR HILL","FAIR MEADOWS","FAIR PLAY PARK","FAIRFAX","FAIRMOUNT PARK","FAIRPLEX PARK","FAR HILLS","FERNDALE","FINGER LAKES","FONNER PARK","FORT ERIE","FORT PIERRE","FOXFIELD","FRESNO","GENESEE VALLEY","GILLESPIE COUNTY FAIRGROUND","GLYNDON","GOLDEN GATE FIELDS","GRAHAM COUNTY FAIR @ SAFFORD","GRAND NATIONAL","GRANDE PRAIRIE","GRANTS PASS","GREAT BARRINGTON FAIR","GREAT FALLS","GREAT LAKES DOWNS","GREAT MEADOW","GREAT MEADOW NSA","GREEN ACRES","GREENELEE COUNTY FAIR @ DUNCAN","GULFSTREAM PARK","GULFSTREAM PARK WEST","HARNEY COUNTY FAIR","HASTINGS RACECOURSE","HAWTHORNE","HAZEL PARK","HIALEAH PARK","HOLLYWOOD CASINO AT CHARLES TOWN RACES","HOLLYWOOD PARK","HOOSIER PARK","HORSEMEN'S PARK","HUMBOLDT COUNTY FAIR","INDIANA GRAND RACE COURSE","INGLESIDE","JEROME COUNTY FAIR","KALISPELL","KAMLOOPS","KEENELAND","KENTUCKY DOWNS","KIN PARK","LA MESA PARK","LAUREL BROWN RACETRACK","LAUREL PARK","LES BOIS PARK","LETHBRIDGE","LEXINGTON","LINCOLN RACE COURSE","LITTLE EVERGLADES","LONE STAR PARK","LOS ALAMITOS","LOS ALAMITOS RACE COURSE","LOUISIANA DOWNS","MAHONING VALLEY RACE COURSE","MALVERN","MANOR DOWNS","MARIAS FAIR","MARLBORO","MARQUIS DOWNS","MEADOWLANDS","MELVILLE DISTRICT AGRIPAR","MIDDLEBURG","MILES CITY","MILL SPRING","MILLARVILLE","MONKTON","MONMOUTH NSA","MONMOUTH PARK","MONTPELIER","MORVEN PARK","MOUNTAINEER CASINO RACETRACK & RESORT","MT. PLEASANT MEADOWS","Monterrico Peru","NORTH DAKOTA HORSE PARK","NORTHLANDS PARK","NORTHVILLE DOWNS","OAK RIDGE","OAK TREE AT HOLLYWOOD PARK","OAK TREE AT PLEASANTON","OAK TREE AT SANTA ANITA","OAKLAWN PARK","OCALA TRAINING CENTER","ONEIDA COUNTY FAIR","PALM BEACH POLO CLUB","PARX RACING","PARX RACING NSA","PENN NATIONAL","PENN NATIONAL NSA","PENNSYLVANIA HUNT CUP","PERCY WARNER","PHILADELPHA PARK","PICOV DOWNS","PIMLICO","PINE MTN-CALLAWAY GARDEN","PINNACLE RACE COURSE","PLEASANTON","POCATELLO DOWNS","PORTLAND MEADOWS","PRAIRIE MEADOWS","PRESQUE ISLE DOWNS","RED BANK","RED MILE","REMINGTON PARK","RETAMA PARK","RILLITO","RIVER DOWNS","ROCKINGHAM PARK","ROSSBURN PARKLAND DOWNS","RUIDOSO DOWNS","RUPERT DOWNS","SACRAMENTO","SAM HOUSTON RACE PARK","SAN JUAN DOWNS","SAN LUIS REY TRAINING CENTER","SANDOWN PARK","SANDY DOWNS","SANTA ANITA PARK","SANTA CRUZ COUNTY FAIR @ SONOITA","SANTA FE","SANTA ROSA","SARATOGA","SARATOGA NSA","SEARCH ENGINE OPTIMIZATION","SHAWAN DOWNS","SHENNANDOAH DOWNS","SOLANO","SOUTHERN PINES","SPORTSMAN'S PARK","STAMPEDE PARK","STOCKTON","STONEYBROOK AT FIVE POINTS","STRAWBERRY HILL","SUFFOLK DOWNS","SUFFOLK NSA","SUN DOWNS","SUNFLOWER","SUNLAND PARK","SUNRAY PARK","SWEETWATER DOWNS","TAMPA BAY DOWNS","THE WOODLANDS","THISTLEDOWN","TILLAMOOK COUNTY FAIR","TIMONIUM","TIOGA DOWNS","TRYON","TURF PARADISE","TURFWAY PARK","WAITSBURG RACE TRACK","WALLA WALLA","WEBER DOWNS","WESTERN MT FAIR","WHITE PINE RACEWAY","WHITE PINE RACING","WILL ROGERS DOWNS","WILLOWDALE STEEPLECHASE","WINTERTHUR","WOODBINE","WYOMING DOWNS","YAKIMA MEADOWS","YAVAPAI DOWNS","YELLOWSTONE DOWNS","ZIA PARK"))
- year = "2017"
- race_date <- "5-20-2017"
- track_name <- "Evangeline"
- login <- function(){
- #Grab CSRF Token
- r <- GET("https://timeformus.com/login?ReturnUrl=/")
- temp<-content(r, "text")
- doc <- htmlParse(temp)
- RequestVerificationToken<-xpathSApply(doc,"/html/body/div/div/div[2]/div/form/input[1]", xmlGetAttr, 'value')[[1]]
- #Login
- r <- POST("https://timeformus.com/Login", body = list('__RequestVerificationToken'=RequestVerificationToken,returnUrl="/",UserName="mutuelinvest",Password="mi1Timeform"), encode = "form")
- temp<-content(r, "text")
- #Keep cookie
- a<-cookies(r)
- .AspNet.ApplicationCookie <-a[7][[1]][3]
- ASP.NET_SessionId <-a[7][[1]][1]
- RequestVerificationToken <- a[7][[1]][2]
- }
- purchase_track <- function(race_date,name){
- #Check track and purchase
- url<-paste(c("https://timeformus.com/buytracks/",race_date),collapse='')
- print(url)
- r <- GET(url)
- temp<-content(r, "text")
- doc <- htmlParse(temp)
- track_name <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div/strong",xmlValue)
- #track_time <-xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div[2]/span/strong",xmlValue)
- track_id <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/input[1]", xmlGetAttr, 'value')
- track_status <- xpathSApply(doc,"//form/div[@class=\"race clearfix\"]/div[3]", xmlValue)
- for(i in 1:length(track_status))
- {
- if(regexpr(name,track_name[i])!=-1)
- {
- if(regexpr("Add",track_status[i])!=-1)
- {
- print("Purchase")
- #Purchase
- r <- POST("https://timeformus.com/BuyTracks/Add", body = list(trackRaceDateId=track_id[i],raceDate=race_date), encode = "form")
- r <- POST("https://timeformus.com/buypps/checkout", body = list(payWith="537b15b2-15af-4450-abdd-4b15d0b6fa9d"), encode = "form")
- }
- else
- {
- print('Already own')
- }
- }
- }
- }
- #Add double quote and clear speical char
- clear_text <- function(raw){
- raw <- paste(c('"',gsub("^\\s+|\\s+$", "", raw),'"'),collapse='')
- return(raw)
- }
- #Open output file
- fileConn<-file("output.csv","w")
- #Login
- login()
- #Check / Purchase track
- purchase_track(race_date,track_name)
- #Get Race date page
- r <- GET(paste(c("https://timeformus.com/account/purchases/",race_date),collapse=''))
- temp<-content(r, "text")
- doc <- htmlParse(temp)
- #Grab track link
- link<-xpathSApply(doc,"//div[@class=\"race clearfix\"]/div[3]/a", xmlGetAttr,'href')
- name <-xpathSApply(doc,"//div[@class=\"race clearfix\"]/div[1]/strong",xmlValue)
- href<-''
- for(i in 1:length(name))
- {
- if(regexpr(track_name,name[i])!=-1)
- {
- href<-link[i]
- print("Found link")
- }
- }
- #Go to track link
- url <- paste(c("https://timeformus.com",href),collapse='')
- print(url)
- r <- GET(url)
- temp<-content(r, "text")
- doc <- htmlParse(temp)
- #Start Loggin result
- #1. Header
- record_type <-'"H"'
- track_name<-clear_text(xpathSApply(doc,"//div[@class=\"track-options\"]/select/option[@selected=\"selected\"]", xmlValue)[[1]])
- track_code <- track_name
- #track_code <- xpathSApply(doc,"//div[@class=\"location\"]", xmlValue)
- #Match Race Abb with full-name
- for(count in 1:length(abb_race_list[[2]]))
- {
- if(regexpr(toupper(abb_race_list[[2]][[count]]), toupper(track_code))!=-1)
- {
- print(abb_race_list[[2]][[count]])
- track_code <- clear_text(abb_race_list[[1]][[count]])
- }
- }
- #clean up in case of no match abb
- track_code<-gsub("\r\n", "",track_code)
- track_code<-gsub(" {2,20}", "",track_code)
- race_date <-xpathSApply(doc,"//div[@class=\"track-date\"]/select/option[@selected=\"selected\"]", xmlValue)[[1]]
- race_date <- clear_text(as.Date(strptime(race_date, format="%A, %b %d")))
- no_race <-clear_text(length(xpathSApply(doc,"//div[@class=\"race-options\"]/select/option", xmlValue)))
- writeLines(paste(c(record_type,track_name,track_code,race_date,no_race),collapse=','), fileConn)
- #Loop through all race no
- for(href in xpathSApply(doc,"//div[@class=\"race-options\"]/select/option", xmlGetAttr,'value'))
- {
- url <- paste(c("https://timeformus.com",href),collapse='')
- r <- GET(url)
- temp<-content(r, "text")
- doc <- htmlParse(temp)
- print(url)
- #3. Race Record
- record_type <- '"R"'
- race_info <- clear_text(xpathSApply(doc,"//div[@class=\"race-options\"]/select/option[@selected=\"selected\"]", xmlValue)[1])
- #print(race_info)
- #race_type <- clear_text(strsplit(race_info, "[,]")[[1]][2])
- race_type <- clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[2]", xmlValue)[1])
- #print(race_type)
- claiming_price <-clear_text(strsplit(race_info, "[,]")[[1]][3])
- #race_date<-'' Same
- race_no<- clear_text(strsplit(strsplit(race_info, "[,]")[[1]][1],"[ ]")[[1]][2])
- surface <-clear_text(strsplit(race_info, "[,]")[[1]][5])
- distance<-clear_text(strsplit(race_info, "[,]")[[1]][4])
- purse<-clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[4]", xmlValue)[1])
- race_text<-clear_text(xpathSApply(doc,"//div[@class=\"content-info diagram-info\"]/p[5]", xmlValue)[1])
- field_size<-clear_text(length(xpathSApply(doc,"//div[@id=\"pp-horse-data\"]/div", xmlValue))-1)
- #writeLines(paste(c(record_type,race_type,claiming_price,race_date,race_no,surface,distance,purse,race_text,field_size),collapse=','), fileConn)
- writeLines(paste(c(record_type,race_type,race_date,race_no,surface,distance,purse,race_text,field_size),collapse=','), fileConn)
- #5.Wager
- record_type<-'"E"'
- #track_code Same
- #race_date same
- #race_no same
- wager_list <- xpathSApply(doc,"//span[@class=\"wagers\"]", xmlValue)[1]
- wager_list <- strsplit(wager_list, "[,]")
- for(wager_t in wager_list[[1]])
- {
- for(wager in strsplit(wager_t, "[/]")[[1]])
- {
- writeLines(paste(c(record_type,track_code,race_date,race_no,clear_text(wager)),collapse=','), fileConn)
- }
- }
- #2. Starter Record
- for( i in 2:length(xpathSApply(doc,"//div[@id=\"pp-horse-data\"]/div", xmlValue)))
- {
- xpath_temp <- paste(c("//div[@id=\"pp-horse-data\"]/div[",i,"]"),collapse='')
- record_type <- '"S"'
- horse_name <- xpathSApply(doc,xpath_temp, xmlGetAttr,'data-name')[1]
- print ("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
- print(horse_name)
- print ("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
- xpath_main_track <- paste(c("//div[@id=\"pp-horse-data\"]/div[",i,"]/div/div/div/div/i"),collapse='')
- if(nchar(clear_text(xpathSApply(doc,xpath_main_track, xmlValue)[1]))>6)
- {
- horse_name <- paste(horse_name,"Main track Only",collapse = '|')
- print(horse_name)
- }
- horse_name <- clear_text(horse_name)
- #race_no
- #race_date
- #track_code
- sire <- clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-sire')[1])
- dam <-xpathSApply(doc,xpath_temp, xmlGetAttr,'data-dam')[1]
- if(length(gregexpr("\\(", dam)[[1]])==1)
- {
- dam_sire <- clear_text(substring(dam,regexpr('\\(', dam)+1,regexpr('\\)', dam)-1))
- }
- else if(length(gregexpr("\\(", dam)[[1]])==2 || length(gregexpr("\\(", dam)[[1]])==3)
- {
- dam_sire <- clear_text(substring(dam,gregexpr("\\(", dam)[[1]][2]+1,rev(gregexpr("\\)", dam)[[1]])[1]-1))
- if(nchar(dam_sire)<=6)
- {
- dam_sire <- clear_text(substring(dam,gregexpr("\\(", dam)[[1]][1]+1,rev(gregexpr("\\)", dam)[[1]])[1]-1))
- }
- }
- dam <- clear_text(substring(dam,0,regexpr('\\(', dam)-2))
- horse_age <-gsub("Age", "", xpathSApply(doc,xpath_temp, xmlGetAttr,'data-sexagestring')[1])
- birth_month <-clear_text(strsplit(horse_age, "[|]")[[1]][3])
- horse_sex <- clear_text(strsplit(horse_age, "[|]")[[1]][1])
- foaling_area <-strsplit(horse_age, "[|]")[[1]][4]
- foaling_area <- clear_text(strsplit(foaling_area,"[-]")[[1]][1])
- horse_age <- clear_text(strsplit(horse_age, "[|]")[[1]][2])
- if(regexpr('Filly', horse_sex)!=-1 || regexpr('Mare', horse_sex)!=-1)
- {
- horse_gender <- clear_text("Female")
- } else { horse_gender <- clear_text("Male") }
- jockey<-clear_text(xpathSApply(doc,paste(c(xpath_temp,"/div[@class=\"col jockey\"]"),collapse=''), xmlValue)[1])
- trainer<-clear_text(xpathSApply(doc,paste(c(xpath_temp,"/div[@class=\"col trainer\"]"),collapse=''), xmlValue)[1])
- owner<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-owner')[1])
- breeder<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-breeder')[1])
- weight <-clear_text(gsub("lbs", "",xpathSApply(doc,xpath_temp, xmlGetAttr,'data-weight')[1]))
- morning_line_odds <-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-ml')[1])
- post<-clear_text(i-1)
- program_no<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-programnumber')[1])
- claiming_price<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-tagdisplay')[1])
- equipment<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-equipment')[1])
- running_style<-clear_text(xpathSApply(doc,xpath_temp, xmlGetAttr,'data-runningstyle')[1])
- data_id <- xpathSApply(doc,xpath_temp, xmlGetAttr,'data-id')[1]
- writeLines(paste(c(record_type,race_no,race_date,track_code,horse_name,sire,dam,dam_sire,horse_age,birth_month,horse_sex,horse_gender,foaling_area,jockey,trainer,owner,breeder,weight,morning_line_odds,post,program_no,claiming_price,equipment,running_style),collapse=','), fileConn)
- #4.Past Performance
- record_type <-'"P"'
- r_t <- POST("https://timeformus.com/basicpps/starterPastPerformances2Ajax", body = list(thisHourse='true',accrued="true",adjusted="false",weightOn="false",paceFigOn="false",starterId=data_id), encode = "form",add_headers("X-Requested-With" = "XMLHttpRequest"))
- temp_t<-content(r_t, "text")
- doc_t <- htmlParse(temp_t)
- r_t_t <- POST("https://timeformus.com/basicpps/starterPastPerformances2Ajax", body = list(thisHourse='true',accrued="true",adjusted="true",weightOn="false",paceFigOn="false",starterId=data_id), encode = "form",add_headers("X-Requested-With" = "XMLHttpRequest"))
- temp_t_t<-content(r_t_t, "text")
- doc_t_t <- htmlParse(temp_t_t) #div[@id=\"pastperformancs\"]/
- if(length(xpathSApply(doc_t,"//div[@class=\"unraced-horse\"]", xmlValue))!=0)
- {
- writeLines(paste(c(record_type,"\"UNRACED \"",horse_name),collapse=','), fileConn)
- }
- else
- {
- for( i in 2:length(xpathSApply(doc_t,"//table/tbody/tr", xmlValue)))
- {
- xpath_string <- paste(c("//table/tbody/tr[",i,"]"),collapse='')
- temp_xpath <- paste(c("//table/tbody/tr[2]","/td[@class=\"no-wrap winner-plces\"]//a/@href"),collapse='')
- temp_year<-xpathSApply(doc_t,"//table/tbody/tr[2]/td[12]", xmlValue)
- print("Link")
- print(temp_year)
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[2]"),collapse='')
- track_code_t <-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[3]"),collapse='')
- race_date_t<-xpathSApply(doc_t,temp_xpath, xmlValue)
- race_int = FALSE
- if(regexpr("[|]",race_date_t)!=-1)
- {
- race_date_t<-strsplit(race_date_t, "[|]")[[1]][2]
- race_int = TRUE
- }
- race_date_t<-clear_text(race_date_t)
- #Information Record
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]/div/div[1]/span[2]"),collapse='')
- info<-xpathSApply(doc_t,temp_xpath, xmlValue)
- #Finding year indicator
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap track-race-date\"]"),collapse='')
- temp_year<-xpathSApply(doc_t,temp_xpath, xmlValue)
- temp_year<-substr(temp_year,regexpr("20\\d\\d",temp_year),regexpr("20\\d\\d",temp_year)+4)
- if(temp_year!="\r\n ")
- {
- if(year!=temp_year)
- {
- writeLines(paste(c('"I"',temp_year,""),collapse=','), fileConn)
- }
- year<-temp_year
- }
- if(info!="")
- {
- #Break multi result apart
- while(regexpr('Previously trained', info)>10 || regexpr('Reported gelding', info)>10 || regexpr('Claimed from', info)>10 || regexpr('Vet Scratch', info)>10)
- {
- out<-clear_text(substr(info,max(regexpr('Previously trained', info),regexpr('Reported gelding', info),regexpr('Claimed from', info),regexpr('Vet Scratch', info)),nchar(info)))
- #writeLines(paste(c('"I"',out),collapse=','), fileConn)
- writeLines(paste(c('"I"',year,out),collapse=','), fileConn)
- info<-substr(info,0,rev(gregexpr(" - ", info)[[1]])[1])
- }
- print(paste("info",info))
- info<-gsub(" - ","",info)
- info<-clear_text(substring(info,max(regexpr('Previously trained', info),regexpr('Reported gelding', info),regexpr('Claimed from', info),regexpr('Vet Scratch', info)),nchar(info)))
- #writeLines(paste(c('"I"',info),collapse=','), fileConn)
- writeLines(paste(c('"I"',year,info),collapse=','), fileConn)
- }
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"pps-class-rating\"]"),collapse='')
- race_speed_fig<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[1]"),collapse='')
- race_number<-clear_text(gsub("Race", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[3]"),collapse='')
- race_type<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[1]/div[2]/span[1]"),collapse='')
- claiming_price<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[4]"),collapse='')
- distance<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[2]"),collapse='')
- surface<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[1]/div[2]/span[3]"),collapse='')
- surface_condition<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[5]"),collapse='')
- purse<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][1]/div[@class=\"diagram-info modal\"]/div/p[6]"),collapse='')
- race_text<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][2]/div[2]"),collapse='')
- temp <-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- weight<-clear_text(gsub('"', '',strsplit(temp, "[|]")[[1]][1]))
- equipment<-clear_text(strsplit(temp, "[|]")[[1]][2])
- odds<-clear_text(gsub('"', "",strsplit(temp, "[|]")[[1]][3]))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap\"][2]/div[1]"),collapse='')
- #jockey<-clear_text(gsub("P", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
- jockey<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"]/div/div[1]"),collapse='')
- post_position<-clear_text(gsub("P", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath <- paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"]/div/div[2]"),collapse='')
- field_size<-clear_text(gsub("F", "",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div/b"),collapse='')
- position_1<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div/sup"),collapse='')
- beaten_length_1<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div/b"),collapse='')
- position_2<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div/sup"),collapse='')
- beaten_length_2<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div/b"),collapse='')
- position_3<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div/sup"),collapse='')
- beaten_length_3<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div/b"),collapse='')
- position_4<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div/sup"),collapse='')
- beaten_length_4<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div/b"),collapse='')
- position_5<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div/sup"),collapse='')
- beaten_length_5<-clear_text(gsub("[^0-9A-Za-z///' ]","",xpathSApply(doc_t,temp_xpath, xmlValue)))
- position_6<-'"-"'
- beaten_length_6<-'"-"'
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div[2]"),collapse='')
- fraction_1_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div[2]"),collapse='')
- fraction_2_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div[2]"),collapse='')
- fraction_3_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div[2]"),collapse='')
- fraction_4_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div[2]"),collapse='')
- fraction_5_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][7]/div[2]"),collapse='')
- fraction_6_off<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[11]/div"),collapse='')
- speed_fig<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap winner-places\"]/a/div"),collapse='')
- company_line<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td/div[@class=\"hide\"]"),collapse='')
- comment<-clear_text(xpathSApply(doc_t,temp_xpath, xmlValue))
- xpath_string <- paste(c("//table/tbody/tr[",i,"]"),collapse='')
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][2]/div[2]"),collapse='')
- fraction_1_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][3]/div[2]"),collapse='')
- fraction_2_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][4]/div[2]"),collapse='')
- fraction_3_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][5]/div[2]"),collapse='')
- fraction_4_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][6]/div[2]"),collapse='')
- fraction_5_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- temp_xpath<-paste(c(xpath_string,"/td[@class=\"no-wrap past-rating\"][7]/div[2]"),collapse='')
- fraction_6_adj<-clear_text(xpathSApply(doc_t_t,temp_xpath, xmlValue))
- if(race_int==TRUE)
- {
- position_5<-position_1
- beaten_length_5<-beaten_length_1
- position_1<-""
- beaten_length_1<-""
- }
- next_out_winners<-''
- writeLines(paste(c(record_type,track_code_t,horse_name,jockey,race_date_t,race_speed_fig,race_number,race_type,claiming_price,distance,surface,surface_condition,purse,race_text,weight,equipment,odds,post_position,field_size,position_1,beaten_length_1,position_2,beaten_length_2,position_3,beaten_length_3,position_4,beaten_length_4,position_5,beaten_length_5,position_6,beaten_length_6,fraction_1_off,fraction_2_off,fraction_3_off,fraction_4_off,fraction_5_off,fraction_6_off,fraction_1_adj,fraction_2_adj,fraction_3_adj,fraction_4_adj,fraction_5_adj,fraction_6_adj,speed_fig,company_line,comment),collapse=','), fileConn)
- }
- }
- #6 Workout record
- url_t <- paste(c("https://timeformus.com/basicpps/starterWorkouts?starterId=",data_id),collapse='')
- r_t <- GET(url_t,add_headers("X-Requested-With" = "XMLHttpRequest"))
- temp_t<-content(r_t, "text")
- doc_t <- htmlParse(temp_t)
- record_type<-'"W"'
- #horse_name <-'' Same
- year<-'2016'
- #2nd part
- for( workout in rev(xpathSApply(doc_t,"//div[@class=\"content\"]/section/ul/li[not(@class)]", xmlValue)))
- {
- if(regexpr('\\d{4}', workout)!=-1)
- {
- year<-substring(workout,regexpr('\\d{4}', workout),regexpr('\\d{4}', workout)+4)
- }
- else
- {
- date <- strsplit(workout, "[ ]")[[1]][1]
- date <- clear_text(as.Date(strptime(paste(c(date,year),collapse='-'), format="%b%d-%Y")))
- track_code_t <- clear_text(strsplit(workout, "[ ]")[[1]][2])
- surface <- clear_text(strsplit(workout, "[ ]")[[1]][3])
- if(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]])==5)
- {
- surface <- clear_text(paste0(c(strsplit(workout, "[ ]")[[1]][3],strsplit(workout, "[ ]")[[1]][4]),collapse=' '))
- surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][5])
- }
- else if(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]])==4)
- {
- surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][4])
- }
- else
- {
- surface_condition <- clear_text(strsplit(workout, "[ ]")[[1]][5])
- }
- #print(length(strsplit((strsplit(workout, "[:]")[[1]][1])," ")[[1]]))
- half_part<-""
- if(length(strsplit(workout,"[:]")[[1]])==3)
- {
- half_part<- paste(c(strsplit(workout, "[:]")[[1]][2],strsplit(workout, "[:]")[[1]][3]),collapse=':')
- }
- else
- {
- half_part <- strsplit(workout,"[:]")[[1]][2]
- }
- distance <- clear_text(strsplit(half_part, "[ ]")[[1]][2])
- time<-clear_text(substr(half_part,regexpr('in ',half_part)+3,regexpr('\\(',half_part)-1))
- rank<-clear_text(substr(half_part,regexpr('\\(',half_part),nchar(half_part)))
- #print(paste(c(workout,surface_condition,distance,time,rank),collapse='||'))
- writeLines(paste(c(record_type,date,horse_name,track_code_t,surface,surface_condition,distance,time,rank),collapse=','), fileConn)
- }
- }
- }
- }
- #Close file
- close(fileConn)
Add Comment
Please, Sign In to add comment