Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #IOT Energy Analytics - Review of Submetering
- install.packages("dplyr")
- library(dplyr)
- install.packages(c("tidyr", "devtools"))
- library(tidyr)
- library(devtools)
- #Data Install
- setwd("~/Desktop/IOT Analytics")
- library(readr)
- household_power_consumption <- read_csv("household_power_consumption.txt")
- View(household_power_consumption)
- attributes(household_power_consumption)
- housevs2 <- read.table("household_power_consumption.txt", header = TRUE, sep = ";", fill = TRUE)
- energy_iot <- housevs2
- View(energy_iot)
- summary(energy_iot)
- str(energy_iot)
- #One column for Date and Time Stamp
- energy_combined <-cbind(energy_iot,paste(energy_iot$Date,energy_iot$Time), stringsAsFactors=FALSE)
- colnames(energy_combined)[10] <-"DateTime"
- View(energy_combined)
- energy_combined <- energy_combined[,c(ncol(energy_combined), 1:(ncol(energy_combined)-1))]
- head(energy_combined)
- View(energy_combined)
- #Convert Date and Time
- energy_combined$DateTime <- strptime(energy_combined$DateTime, "%d/%m/%Y %H:%M:%S")
- energy_combined$Date <- as.Date(energy_combined$Date, "%d/%m/%Y")
- str(energy_combined)
- summary(energy_combined)
- #Attributes redefined
- energy_combined$Sub_metering_3 <- as.factor(energy_combined$Sub_metering_3)
- str(energy_combined)
- #Group Data and transform Posix _ dec
- library(dplyr)
- energy_combined$Date <- as.POSIXct(energy_combined$Date)
- energy_combined$DateTime <- as.POSIXct(energy_combined$DateTime)
- #Descriptive Stats
- summary(energy_combined)
- #Feature Engineering
- energy_combined$Global_active_power<-NULL
- energy_combined$Global_reactive_power<-NULL
- energy_combined$Voltage<-NULL
- energy_combined$Global_intensity<- NULL
- #Df Quarter1 2007
- install.packages("lubridate")
- library(lubridate)
- dim(energy_combined)
- str(energy_combined)
- #Subset Dataframes 2007 - Submeter 1 - March
- DF2007_march <- subset(energy_combined, Date == "2007-03-01"| Date =="2007-03-03"| Date=="2007-03-04"| Date=="2007-03-05"|Date=="2007-03-06"|Date=="2007-03-07"|Date=="2007-03-08"|Date=="2007-03-09"|Date=="2007-03-10")
- View(DF2007_march)
- #Subset Dataframes 2007 - Submeter 2 - August
- DF2007_august <- subset(energy_combined, Date == "2007-08-01"| Date =="2007-08-03"| Date=="2007-08-04"| Date=="2007-08-05"|Date=="2007-08-06"|Date=="2007-08-07"|Date=="2007-08-08"|Date=="2007-08-09"|Date=="2007-08-10")
- View(DF2007_august)
- #Subset Dataframes 2007 - Submeter 3 - December
- DF2007_dec <- subset(energy_combined, Date == "2007-12-01"| Date =="2007-12-03"| Date=="2007-12-04"| Date=="2007-12-05"|Date=="2007-12-06"|Date=="2007-12-07"|Date=="2007-12-08"|Date=="2007-12-09"|Date=="2007-12-10")
- View(DF2007_dec)
- #Subset Dataframes 2008 - Submeter 1 - march
- DF2008_march <- subset(energy_combined, Date == "2008-03-01"| Date =="2008-03-03"| Date=="2008-03-04"| Date=="2008-03-05"|Date=="2008-03-06"|Date=="2008-03-07"|Date=="2008-03-08"|Date=="2008-03-09"|Date=="2008-03-10")
- View(DF2008_aug)
- #Subset Dataframes 2008 - Submeter 2 - august
- DF2008_august <- subset(energy_combined, Date == "2008-08-01"| Date =="2008-08-03"| Date=="2008-08-04"| Date=="2008-08-05"|Date=="2008-08-06"|Date=="2008-08-07"|Date=="2008-08-08"|Date=="2008-08-09"|Date=="2008-08-10")
- View(DF2008_august)
- #Subset Dataframes 2008 - Submeter 3 - december
- DF2008_dec <- subset(energy_combined, Date == "2008-12-01"| Date =="2008-12-03"| Date=="2008-12-04"| Date=="2008-12-05"|Date=="2008-12-06"|Date=="2008-12-07"|Date=="2008-12-08"|Date=="2008-12-09"|Date=="2008-12-10")
- View(DF2008_dec)
- #Subset Dataframes 2009 - Submeter 1 -march
- DF2009_march <- subset(energy_combined, Date == "2009-03-01"| Date =="2009-03-03"| Date=="2009-03-04"| Date=="2009-03-05"|Date=="2009-03-06"|Date=="2009-03-07"|Date=="2009-03-08"|Date=="2009-03-09"|Date=="2009-03-10")
- View(DF2009_march)
- #Subset Dataframes 2009 - Submeter 2 - august
- DF2009_august <- subset(energy_combined, Date == "2009-08-01"| Date =="2009-08-03"| Date=="2009-08-04"| Date=="2009-08-05"|Date=="2009-08-06"|Date=="2009-08-07"|Date=="2009-08-08"|Date=="2009-08-09"|Date=="2009-08-10")
- View(DF2009_august)
- #Subset Dataframes 2009 - Submeter 3 - december
- DF2009_dec <- subset(energy_combined, Date == "2009-12-01"| Date =="2009-12-03"| Date=="2009-12-04"| Date=="2009-12-05"|Date=="2009-12-06"|Date=="2009-12-07"|Date=="2009-12-08"|Date=="2009-12-09"|Date=="2009-12-10")
- View(DF2009_dec)
- #Subset Dataframes 2010 - Submeter 1 - march
- DF2010_march <- subset(energy_combined, Date == "2010-03-01"| Date =="2010-03-03"| Date=="2010-03-04"| Date=="2010-03-05"|Date=="2010-03-06"|Date=="2010-03-07"|Date=="2010-03-08"|Date=="2010-03-09"|Date=="2010-03-10")
- View(DF2010_march)
- #Subset Dataframes 2010 - Submeter 2 -august
- DF2010_august <- subset(energy_combined, Date == "2010-08-01"| Date =="2010-08-03"| Date=="2010-08-04"| Date=="2010-08-05"|Date=="2010-08-06"|Date=="2010-08-07"|Date=="2010-08-08"|Date=="2010-08-09"|Date=="2010-08-10")
- View(DF2010_august)
- #Subset Dataframes 2010 - Submeter 3 -november
- DF2010_nov <- subset(energy_combined, Date == "2010-11-01"| Date =="2010-11-03"| Date=="2010-11-04"| Date=="2010-11-05"|Date=="2010-11-06"|Date=="2010-11-07"|Date=="2010-11-08"|Date=="2010-11-09"|Date=="2010-11-10")
- View(DF2010_nov)
- #Remove variable noise for august
- DF2007_august$Sub_metering_2 <- NULL
- DF2007_august$Sub_metering_3 <- NULL
- DF2007_august$DateTime <- NULL
- DF2007_august$Global_intensity <- NULL
- DF2007_august$Global_active_power <- NULL
- DF2007_august$Global_reactive_power <- NULL
- DF2007_august$Date <- NULL
- DF2007_august$Time<- NULL
- View(DF2007_august)
- str(DF2007_august)
- glimpse(DF2007_august)
- library("")
- ?tslm
- ?forecast
- #forecast for august 2007_ten days in august_submeter1
- Fore2007 <- ts(DF2007_august)
- plot.ts(Fore2007)
- plot(Fore2007)
- plot.forecast(Fore2007)
- View (Fore2007)
- summary(Fore2007)
- Augustfore_2007 <- forecast()
- Augustfore_2007
- augustoutput <- as.data.frame(Augustfore_2007)
- write_excel_csv(augustoutput, "August2007.csv")
- #forecast focusing on Submeter 1 for August 2007
- DF2007_august$Sub_metering_2 <- NULL
- DF2007_august$Sub_metering_3 <- NULL
- View(Fore2007)
- plot(Fore2007)
- Aug2007_sub1 <- forecast(Fore2007, level = c(80,95), lambda = NULL)
- Aug2007_sub1
- Augsub1 <- as.data.frame(Aug2007_sub1)
- write_excel_csv(Augsub1, "augsub1.csv")
- #Voltage August 2007
- plot(Augustfore_2007)
- #Forecast time series #2 (Dec_2008_winter)
- DF2008_dec$DateTime<-NULL
- DF2008_dec$Date<-NULL
- Fore2008 <- ts(DF2008_dec)
- is.na(Fore2008)
- plot(Fore2008)
- View(Fore2008)
- Decfore_2008 <- forecast(Fore2008, level = c(80,95), lambda = NULL)
- Decfore_2008
- plot(Decfore_2008)
- decoutput <- as.data.frame(Decfore_2008)
- write_excel_csv(decoutput, "Dec2008.csv")
- #Dec 2008 focus on submeter 2
- DF2008_dec$Sub_metering_3<- NULL
- DF2008_dec$Sub_metering_1 <- NULL
- dec2008_sub2 <- forecast(Fore2008, level = c(80,95), lambda = NULL)
- plot(dec2008_sub2)
- dec2008_sub2
- decsub2<-as.data.frame(dec2008_sub2)
- write_excel_csv(decsub2,"dec2008sub2.csv")
- #Voltage dec 2008 sub 2
- DF2008_dec$Global_active_power <-NULL
- DF2008_dec$Global_reactive_power <- NULL
- DF2008_dec$Global_intensity <- NULL
- DF2008_dec$Time<- NULL
- Fore2008 <- ts(DF2008_dec)
- plot(Fore2008)
- #Forecast time series #3 (March 2009_spring)
- DF2009_march$DateTime <- NULL
- DF2009_march$Date<-NULL
- DF2009_march$Time<-NULL
- Fore2009 <- ts(DF2009_march)
- View(Fore2009)
- plot(Fore2009)
- Marchfore_2009 <- forecast(Fore2009, level = c(80,95), lambda = NULL)
- Marchfore_2009
- marchustoutput <- as.data.frame(Marchfore_2009)
- write_excel_csv(marchustoutput, "March 2009.csv")
- #March 2009 focus on submeter 3
- DF2009_march$Sub_metering_1<-NULL
- DF2009_march$Sub_metering_2<-NULL
- MAR2009_sub3 <- forecast(Fore2009, level = c(80,95), lambda = NULL)
- plot(MAR2009_sub3)
- dec2008_sub2
- marsub3<-as.data.frame(MAR2009_sub3)
- write_excel_csv(marsub3,"mar2009sub3.csv")
- #Voltage Macrch 2009 sub3
- DF2009_march$Global_active_power <-NULL
- DF2009_march$Global_reactive_power <- NULL
- DF2009_march$Global_intensity <- NULL
- Fore2009 <- ts(DF2009_march)
- plot(Fore2009)
- #Decomposing Seasonal Trends for Energy data _ August 2007
- library("dplyr")
- householdtimeseries_2007_activepower<- ts(energy_combined$Global_active_power, frequency = 12, start = c(2007,1), end = c(2007,12))
- householdtimeseries_2007
- plot.ts(householdtimeseries_2007)
- season2007<-decompose(householdtimeseries_2007_activepower, "multiplicative")
- season2007
- plot(as.ts(season2007$seasonal))
- plot(as.ts(season2007$random))
- plot(as.ts(season2007$trend))
- str(season2007)
- summary(season2007)
- season2007_df <- data.frame(season2007 = c(season2007), time = c(time(season2007)))
- write_excel_csv(season2007_df,"season2007.csv")
- #Decomposing Seasonal Trends for Energy data _ Dec 2008
- library("dplyr")
- householdtimeseries_2008 <- ts(energy_combined, frequency = 4, start = c(2008,1), end = c(2008,12))
- householdtimeseries_2008
- plot.ts(householdtimeseries_2008)
- season2008<-decompose(householdtimeseries_2008, "multiplicative")
- season2008
- plot(as.ts(season2008$seasonal))
- plot(as.ts(season2008$random))
- plot(as.ts(season2008$trend))
- season2008_df <- data.frame(season2008 = c(season2008), time = c(time(season2008)))
- write_excel_csv(season2008_df,"season2008.csv")
- #Decomposing Seasonal Trends for Energy data _ March 2009
- library("dplyr")
- householdtimeseries_2009 <- ts(energy_combined, frequency = 4, start = c(2009,1), end = c(2009,12))
- householdtimeseries_2009
- plot.ts(householdtimeseries_2009)
- season2009<-decompose(, "multiplicative")
- season2009
- plot(as.ts(season2009$seasonal))
- plot(as.ts(season2009$random))
- plot(as.ts(season2009$trend))
- season2009_df <- data.frame(season2009 = c(season2009), time = c(time(season2009)))
- write_excel_csv(season2009_df,"season2009.csv")
- #holtwinters for 2007
- ?HoltWinters
- library(ggplot2)
- ?ts
- householdtimeseries_2007 <- ts(energy_combined, frequency = 12, start = c("2007-08-01"), end = c("2007-09-01"))
- householdtimeseries_2007
- plot(householdtimeseries_2007)
- august2007 <- ts(energy_combined, frequency = 1, start = c(2007,8), end = c(2007,9))
- august2007
- energy_combined
- View(energy_combined)
- View(august2007)
- smooth2007<-HoltWinters(householdtimeseries_2007, beta = FALSE, gamma = FALSE)
- lines(fitted(smooth2007)[,1], col = 3)
- plot(smooth2007)
- plot(fitted(smooth2007))
- #Holtwinter for 2008
- smmoth2008<-HoltWinters(householdtimeseries_2008, beta = FALSE, gamma = FALSE)
- plot(smmoth2008)
- #Holtwinter for 2009
- smmoth2009<-HoltWinters(householdtimeseries_2009, beta = FALSE, gamma = FALSE)
- View(Fore2008)
- View(DF2008_dec)
- View(DF2009_march)
Add Comment
Please, Sign In to add comment