Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- lapply(total2[c(2,4,6:53),drop=FALSE],as.numeric)
- library(XML)
- library(ggplot2)
- # Grabs ADVANCED link and assigns df to ADVANCED table
- advanced = "http://www.basketball-
- reference.com/leagues/NBA_2017_advanced.html"
- advanced.table = readHTMLTable(advanced, header=T, which=1,stringsAsFactors=F)
- # Grabs REGULAR link and assigns df to REGULAR table
- reg = "http://www.basketball-reference.com/leagues/NBA_2017_per_game.html"
- reg.table = readHTMLTable(reg,header=T,which = 1,stringsAsFactors=F)
- # Grab CONTRACTS link and assigns df to CONTRACTS table
- contracts = 'http://www.basketball-reference.com/contracts/players.html'
- contracts.table = readHTMLTable(contracts,header = T,which=1,stringsAsFactors=F)
- # Puts them into tables
- df <- advanced.table
- rdf <- reg.table
- cdf <- contracts.table
- #Changes column name MP to MPG so no confusion later on
- colnames(rdf)[8] <- "MPG"
- # Drops rows with column labels
- df <- df[!grepl('Player',df$Player),]
- rdf <- rdf[!grepl('Player',rdf$Player),]
- cdf <- cdf[!grepl('Player',cdf$Player),]
- cdf <- cdf[!grepl('Salary',cdf$Player),]
- # Creates list of duplicate col names and drops them from rdf and cdf
- drops <- c("Pos","Age","Tm","G","Rk")
- rdf <- rdf[ , !(names(rdf) %in% drops)]
- cdf <- cdf[ , !(names(cdf) %in% drops)]
- # Merges df and rdf. Then merge total with cdf to create total2
- total <- merge(df, rdf,by=c("Player"))
- total2 <- merge(total,cdf,by=c("Player"))
- # Converts selected columns from character to numeric
- total[c(2,4,6:53)] <- lapply(total[c(2,4,6:53),drop=FALSE],as.numeric)
- total2[c(2,4,6:53)] <- lapply(total2[c(2,4,6:53),drop=FALSE],as.numeric)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement