Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- houseID year price
- 1 1995 NA
- 1 1996 100
- 1 1997 NA
- 1 1998 120
- 1 1999 NA
- 2 1995 NA
- 2 1996 NA
- 2 1997 NA
- 2 1998 30
- 2 1999 NA
- 3 1995 NA
- 3 1996 44
- 3 1997 NA
- 3 1998 NA
- 3 1999 NA
- houseID year price
- 1 1995 NA
- 1 1996 100
- 1 1997 100
- 1 1998 120
- 1 1999 120
- 2 1995 NA
- 2 1996 NA
- 2 1997 NA
- 2 1998 30
- 2 1999 30
- 3 1995 NA
- 3 1996 44
- 3 1997 44
- 3 1998 44
- 3 1999 44
- # Number of houses
- N = 15
- # Data frame
- df = data.frame(houseID = rep(1:N,each=10), year=1995:2004, price =ifelse(runif(10*N)>0.15, NA,exp(rnorm(10*N))))
- library(dplyr)
- library(zoo)
- df %>% group_by(houseID) %>% na.locf %>% ungroup
- Source: local data frame [15 x 3]
- Groups: houseID
- houseID year price
- 1 1 1995 NA
- 2 1 1996 100
- 3 1 1997 100
- 4 1 1998 120
- 5 1 1999 120
- 6 2 1995 NA
- 7 2 1996 NA
- 8 2 1997 NA
- 9 2 1998 30
- 10 2 1999 30
- 11 3 1995 NA
- 12 3 1996 44
- 13 3 1997 44
- 14 3 1998 44
- 15 3 1999 44
- df %>% by(df$houseID, na.locf) %>% rbind_all
- library(zoo)
- do.call(rbind, by(df, df$houseID, na.locf))
- library(zoo)
- na.locf2 <- function(x) na.locf(x, na.rm = FALSE)
- transform(df, price = ave(price, houseID, FUN = na.locf2))
- library(data.table)
- library(zoo)
- data.table(df)[, na.locf(.SD), by = houseID]
- library(zoo)
- z <- read.zoo(df, index = 2, split = 1, FUN = identity)
- na.locf(z, na.rm = FALSE)
- 1 2 3
- 1995 NA NA NA
- 1996 100 NA 44
- 1997 100 NA 44
- 1998 120 30 44
- 1999 120 30 44
- library(dplyr)
- library(zoo)
- df %>% read.zoo(index = 2, split = 1, FUN = identity) %>% na.locf(na.rm = FALSE)
- df <- structure(list(houseID = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
- 2L, 3L, 3L, 3L, 3L, 3L), year = c(1995L, 1996L, 1997L, 1998L,
- 1999L, 1995L, 1996L, 1997L, 1998L, 1999L, 1995L, 1996L, 1997L,
- 1998L, 1999L), price = c(NA, 100L, NA, 120L, NA, NA, NA, NA,
- 30L, NA, NA, 44L, NA, NA, NA)), .Names = c("houseID", "year",
- "price"), class = "data.frame", row.names = c(NA, -15L))
- library(dplyr)
- library(tidyr)
- df %>% group_by(houseID) %>% fill(price)
- # Source: local data frame [15 x 3]
- # Groups: houseID [3]
- #
- # houseID year price
- # (int) (int) (int)
- # 1 1 1995 NA
- # 2 1 1996 100
- # 3 1 1997 100
- # 4 1 1998 120
- # 5 1 1999 120
- # 6 2 1995 NA
- # 7 2 1996 NA
- # 8 2 1997 NA
- # 9 2 1998 30
- # 10 2 1999 30
- # 11 3 1995 NA
- # 12 3 1996 44
- # 13 3 1997 44
- # 14 3 1998 44
- # 15 3 1999 44
- require(data.table)
- setDT(df) ## change it to data.table in place
- setkey(df, houseID, year) ## needed for fast join
- df.woNA <- df[!is.na(price)] ## version without the NA rows
- # rolling self-join will return what you want
- df.woNA[df, roll=TRUE] ## will match previous year if year not found
- df %>%
- group_by(houseID) %>%
- mutate(price_change = cumsum(0 + !is.na(price))) %>%
- group_by(price_change, add = TRUE) %>%
- mutate(price_filled = nth(price, 1)) %>%
- ungroup() %>%
- select(-price_change) -> df2
- > tail(df2, 20)
- Source: local data frame [20 x 4]
- houseID year price price_filled
- 1 14 1995 NA NA
- 2 14 1996 NA NA
- 3 14 1997 NA NA
- 4 14 1998 NA NA
- 5 14 1999 0.8374778 0.8374778
- 6 14 2000 NA 0.8374778
- 7 14 2001 NA 0.8374778
- 8 14 2002 NA 0.8374778
- 9 14 2003 2.1918880 2.1918880
- 10 14 2004 NA 2.1918880
- 11 15 1995 NA NA
- 12 15 1996 0.3982450 0.3982450
- 13 15 1997 NA 0.3982450
- 14 15 1998 1.7727000 1.7727000
- 15 15 1999 NA 1.7727000
- 16 15 2000 NA 1.7727000
- 17 15 2001 NA 1.7727000
- 18 15 2002 7.8636329 7.8636329
- 19 15 2003 NA 7.8636329
- 20 15 2004 NA 7.8636329
- prices$price <-unlist(lapply(split(prices$price,prices$houseID),function(x) na.locf(x,na.rm=FALSE)))
- prices
- houseID year price
- 1 1 1995 NA
- 2 1 1996 100
- 3 1 1997 100
- 4 1 1998 120
- 5 1 1999 120
- 6 2 1995 NA
- 7 2 1996 NA
- 8 2 1997 NA
- 9 2 1998 30
- 10 2 1999 30
- 11 3 1995 NA
- 12 3 1996 44
- 13 3 1997 44
- 14 3 1998 44
- 15 3 1999 44
Add Comment
Please, Sign In to add comment