Guest User

Untitled

a guest
Oct 19th, 2017
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.78 KB | None | 0 0
  1. houseID year price
  2. 1 1995 NA
  3. 1 1996 100
  4. 1 1997 NA
  5. 1 1998 120
  6. 1 1999 NA
  7. 2 1995 NA
  8. 2 1996 NA
  9. 2 1997 NA
  10. 2 1998 30
  11. 2 1999 NA
  12. 3 1995 NA
  13. 3 1996 44
  14. 3 1997 NA
  15. 3 1998 NA
  16. 3 1999 NA
  17.  
  18. houseID year price
  19. 1 1995 NA
  20. 1 1996 100
  21. 1 1997 100
  22. 1 1998 120
  23. 1 1999 120
  24. 2 1995 NA
  25. 2 1996 NA
  26. 2 1997 NA
  27. 2 1998 30
  28. 2 1999 30
  29. 3 1995 NA
  30. 3 1996 44
  31. 3 1997 44
  32. 3 1998 44
  33. 3 1999 44
  34.  
  35. # Number of houses
  36. N = 15
  37.  
  38. # Data frame
  39. df = data.frame(houseID = rep(1:N,each=10), year=1995:2004, price =ifelse(runif(10*N)>0.15, NA,exp(rnorm(10*N))))
  40.  
  41. library(dplyr)
  42. library(zoo)
  43.  
  44. df %>% group_by(houseID) %>% na.locf %>% ungroup
  45.  
  46. Source: local data frame [15 x 3]
  47. Groups: houseID
  48.  
  49. houseID year price
  50. 1 1 1995 NA
  51. 2 1 1996 100
  52. 3 1 1997 100
  53. 4 1 1998 120
  54. 5 1 1999 120
  55. 6 2 1995 NA
  56. 7 2 1996 NA
  57. 8 2 1997 NA
  58. 9 2 1998 30
  59. 10 2 1999 30
  60. 11 3 1995 NA
  61. 12 3 1996 44
  62. 13 3 1997 44
  63. 14 3 1998 44
  64. 15 3 1999 44
  65.  
  66. df %>% by(df$houseID, na.locf) %>% rbind_all
  67.  
  68. library(zoo)
  69.  
  70. do.call(rbind, by(df, df$houseID, na.locf))
  71.  
  72. library(zoo)
  73.  
  74. na.locf2 <- function(x) na.locf(x, na.rm = FALSE)
  75. transform(df, price = ave(price, houseID, FUN = na.locf2))
  76.  
  77. library(data.table)
  78. library(zoo)
  79.  
  80. data.table(df)[, na.locf(.SD), by = houseID]
  81.  
  82. library(zoo)
  83.  
  84. z <- read.zoo(df, index = 2, split = 1, FUN = identity)
  85. na.locf(z, na.rm = FALSE)
  86.  
  87. 1 2 3
  88. 1995 NA NA NA
  89. 1996 100 NA 44
  90. 1997 100 NA 44
  91. 1998 120 30 44
  92. 1999 120 30 44
  93.  
  94. library(dplyr)
  95. library(zoo)
  96.  
  97. df %>% read.zoo(index = 2, split = 1, FUN = identity) %>% na.locf(na.rm = FALSE)
  98.  
  99. df <- structure(list(houseID = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
  100. 2L, 3L, 3L, 3L, 3L, 3L), year = c(1995L, 1996L, 1997L, 1998L,
  101. 1999L, 1995L, 1996L, 1997L, 1998L, 1999L, 1995L, 1996L, 1997L,
  102. 1998L, 1999L), price = c(NA, 100L, NA, 120L, NA, NA, NA, NA,
  103. 30L, NA, NA, 44L, NA, NA, NA)), .Names = c("houseID", "year",
  104. "price"), class = "data.frame", row.names = c(NA, -15L))
  105.  
  106. library(dplyr)
  107. library(tidyr)
  108.  
  109. df %>% group_by(houseID) %>% fill(price)
  110. # Source: local data frame [15 x 3]
  111. # Groups: houseID [3]
  112. #
  113. # houseID year price
  114. # (int) (int) (int)
  115. # 1 1 1995 NA
  116. # 2 1 1996 100
  117. # 3 1 1997 100
  118. # 4 1 1998 120
  119. # 5 1 1999 120
  120. # 6 2 1995 NA
  121. # 7 2 1996 NA
  122. # 8 2 1997 NA
  123. # 9 2 1998 30
  124. # 10 2 1999 30
  125. # 11 3 1995 NA
  126. # 12 3 1996 44
  127. # 13 3 1997 44
  128. # 14 3 1998 44
  129. # 15 3 1999 44
  130.  
  131. require(data.table)
  132. setDT(df) ## change it to data.table in place
  133. setkey(df, houseID, year) ## needed for fast join
  134. df.woNA <- df[!is.na(price)] ## version without the NA rows
  135.  
  136. # rolling self-join will return what you want
  137. df.woNA[df, roll=TRUE] ## will match previous year if year not found
  138.  
  139. df %>%
  140. group_by(houseID) %>%
  141. mutate(price_change = cumsum(0 + !is.na(price))) %>%
  142. group_by(price_change, add = TRUE) %>%
  143. mutate(price_filled = nth(price, 1)) %>%
  144. ungroup() %>%
  145. select(-price_change) -> df2
  146.  
  147. > tail(df2, 20)
  148. Source: local data frame [20 x 4]
  149.  
  150. houseID year price price_filled
  151. 1 14 1995 NA NA
  152. 2 14 1996 NA NA
  153. 3 14 1997 NA NA
  154. 4 14 1998 NA NA
  155. 5 14 1999 0.8374778 0.8374778
  156. 6 14 2000 NA 0.8374778
  157. 7 14 2001 NA 0.8374778
  158. 8 14 2002 NA 0.8374778
  159. 9 14 2003 2.1918880 2.1918880
  160. 10 14 2004 NA 2.1918880
  161. 11 15 1995 NA NA
  162. 12 15 1996 0.3982450 0.3982450
  163. 13 15 1997 NA 0.3982450
  164. 14 15 1998 1.7727000 1.7727000
  165. 15 15 1999 NA 1.7727000
  166. 16 15 2000 NA 1.7727000
  167. 17 15 2001 NA 1.7727000
  168. 18 15 2002 7.8636329 7.8636329
  169. 19 15 2003 NA 7.8636329
  170. 20 15 2004 NA 7.8636329
  171.  
  172. prices$price <-unlist(lapply(split(prices$price,prices$houseID),function(x) na.locf(x,na.rm=FALSE)))
  173.  
  174. prices
  175. houseID year price
  176. 1 1 1995 NA
  177. 2 1 1996 100
  178. 3 1 1997 100
  179. 4 1 1998 120
  180. 5 1 1999 120
  181. 6 2 1995 NA
  182. 7 2 1996 NA
  183. 8 2 1997 NA
  184. 9 2 1998 30
  185. 10 2 1999 30
  186. 11 3 1995 NA
  187. 12 3 1996 44
  188. 13 3 1997 44
  189. 14 3 1998 44
  190. 15 3 1999 44
Add Comment
Please, Sign In to add comment