Guest User

Untitled

a guest
Jul 21st, 2018
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.91 KB | None | 0 0
  1. > head(df2)
  2. # A tibble: 20 x 2
  3.  
  4. `Date (US)` Value
  5. <dttm> <dbl>
  6. 1 2016-01-02 02:24:00 243814121.
  7. 2 2016-01-04 16:48:00 243821146.
  8. 3 2016-01-29 21:36:00 243837845.
  9. 4 2016-01-30 21:36:00 243838871.
  10. 5 2016-02-04 21:36:00 243841925.
  11. 6 2016-02-06 04:48:00 243843313.
  12.  
  13. rm(list=ls())
  14.  
  15. #http://sma.epfl.ch/~lbelzile/math342/pract_sup.html
  16. library(readxl)
  17. library(forecast)
  18. library(imputeTS)
  19. library(xts)
  20. library(zoo)
  21. library(data.table)
  22. library(xlsx)
  23.  
  24. #import, combine, and sort data
  25. df1 <- read_excel("C:/Users/PC/Desktop/Time_Series/upwork-data.xlsx")
  26. df2 <- read_excel("C:/Users/PC/Desktop/Time_Series/upwork-data.xlsx", sheet = 2)
  27. df <- rbind(df1,df2)
  28. df <- df[order(df$`Date (US)`),]
  29.  
  30. #plot irregular time series
  31. itimeseries <- as.xts(x = df$Value, order.by = df$`Date (US)`)
  32. plot(itimeseries, grid.ticks.on = "year", yaxis.right = FALSE)
  33.  
  34. #plot interpolation
  35. plot(na.interpolation(itimeseries))
  36.  
  37. #get interpolated values
  38. df$interp <- na.interpolation(itimeseries)
  39.  
  40. #use only those interpolated values where data are avalailable within a day on either side
  41. df$dateplus <- df$`Date (US)` + 24*60*60 # time moved up 24 hours
  42. df$dateminus <- df$`Date (US)` - 24*60*60 #time moved back 24 hours
  43. setDT(df)[, laguptime:= shift(df$"Date (US)")][] #time moved by 1 row down
  44. setDT(df)[, lagdowntime:= shift(df$"Date (US)",type="lead")][] #time moved by 1 row up
  45. df$new <- ifelse(df$lagdowntime < df$dateplus | df$laguptime > df$dateminus , df$interp, NA)
  46.  
  47. #keep only regular midnight values
  48. df$time <- strftime(df$`Date (US)`, format="%H:%M:%S")
  49. df$new <- ifelse(df$time == "00:00:00" | df$time == "01:00:00", df$new, NA)
  50. df3 <- df[which(df$time == "00:00:00" | df$time == "01:00:00"),]
  51.  
  52. #make regular time series of all midnights
  53. timeseries <- as.xts(x = df3$new, order.by = df3$`Date (US)`)
  54. plot(timeseries, grid.ticks.on = "year", yaxis.right = FALSE)
  55.  
  56. #visualize missing data
  57. plotNA.distributionBar(timeseries)
Add Comment
Please, Sign In to add comment