CJamie

ass8

Oct 12th, 2021
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.14 KB | None | 0 0
  1.  
  2.  
  3. # Step 0: Start; Getting the starting time
  4. cat("\nSTART\n")
  5. startTime = proc.time()[3]
  6. startTime
  7.  
  8.  
  9.  
  10. # Step 1: Include Library
  11. cat("\nStep 1: Library Inclusion")
  12.  
  13.  
  14.  
  15. # Step 2: Variable Declaration and Seed Setting
  16. cat("\nStep 2: Variable Declaration and Seed Setting")
  17. modelName <- "linear"
  18. modelName
  19. InputDataFileName="regressionDataSet.csv"
  20. InputDataFileName
  21.  
  22. seed=sample(99999:999999, 1)
  23. seed
  24. set.seed(seed)
  25.  
  26.  
  27.  
  28. # Step 3: Data Division for train/test
  29. cat("\nStep 3: Data Division")
  30. training=50 # define percentage
  31.  
  32.  
  33.  
  34. # Step 4: Load the data from file
  35. cat("\nStep 4: Loading the data")
  36. dataset <- read.csv(InputDataFileName) # Read the datafile
  37. head(dataset) # Show Top 6 records
  38. nrow(dataset) # Show number of records
  39. names(dataset) # Show fields names or columns names
  40.  
  41.  
  42.  
  43. # Step 5: Count total number of observations/rows.
  44. cat("\nStep 5: Counting dataset")
  45. totalDataset <- nrow(dataset)
  46. totalDataset
  47.  
  48.  
  49.  
  50. # Step 6: Choose Target variable
  51. cat("\nStep 6: Choose Target Variable")
  52. target <- names(dataset)[1] # i.e. RMSD
  53. target
  54.  
  55.  
  56.  
  57. # Step 7: Choose inputs Variables
  58. cat("\nStep 7: Choose Inputs Variable")
  59. inputs <- setdiff(names(dataset),target)
  60. inputs
  61. length(inputs)
  62.  
  63.  
  64. selectedInputs <- inputs
  65.  
  66.  
  67. # Step 8: Select Training Data Set ( Select random indices )
  68. cat("\nStep 8: Select training dataset")
  69. trainSample <- sample(totalDataset, totalDataset * training/100)
  70. head(trainSample) # Show Top 6 indecies
  71. length(trainSample) # Show number of indecies
  72.  
  73. trainDataset <- dataset[trainSample,c(selectedInputs, target)]
  74. head(trainDataset) # Show Top 6 records
  75. nrow(trainDataset) # Show number of train Dataset
  76.  
  77.  
  78.  
  79. # Step 9: Select Testing Data Set
  80. cat("\nStep 9: Select testing dataset")
  81. testSample <- setdiff(seq_len(nrow(dataset)), trainSample)
  82. head(testSample) # Show Top 6 indecies
  83. length(testSample) # Show number of indecies
  84.  
  85. testDataset <- dataset[testSample,c(selectedInputs, target)]
  86. head(testDataset)
  87. nrow(testDataset)
  88.  
  89.  
  90.  
  91.  
  92. # Step 10: Regression Model Building: Training
  93. cat("\nStep 10: Model Building -> ", modelName)
  94. formula <- as.formula(paste(target,"~",paste(c(selectedInputs),collapse = "+")))
  95. formula
  96.  
  97. model <- lm(formula, trainDataset)
  98. model
  99. summary(model)
  100.  
  101.  
  102. # Step 11: Prediction (Testing)
  103. cat("\nStep 11: Prediction using -> ", modelName)
  104. Predicted <- predict(model, testDataset)
  105. head(Predicted)
  106.  
  107.  
  108.  
  109. # Step 12: Extracting Actual
  110. cat("\nStep 12: Extracting Actual")
  111. Actual <- as.double(unlist(testDataset[target]))
  112. head(Actual)
  113.  
  114.  
  115.  
  116.  
  117. # Step 13: Model Evaluation
  118. cat("\nStep 13: Model Evaluation")
  119.  
  120. # Step 13.1: Correlation
  121. r <- cor(Actual,Predicted )
  122. r <- round(r,2)
  123. r
  124.  
  125. # Step 13.2: Accuracy
  126. accuracy <- mean(abs(Actual-Predicted) <=1)
  127. accuracy <- round(accuracy,4) *100
  128. accuracy
  129.  
  130. # Step 13.5: Total Time
  131. totalTime = proc.time()[3] - startTime
  132. totalTime
  133.  
  134. # Step 13.6: Scatter Plot
  135. png(filename=paste(modelName,"-ScatterPlot.png",sep=''))
  136.  
  137. plot(Actual,Predicted,main=paste("Actual Vs Predicted\n",modelName),xlab="Predicted", ylab="Actual")#, pch=19)
  138.  
  139. abline(lm(Actual ~ Predicted,),col="White")
  140.  
  141. dev.off()
  142.  
  143.  
  144.  
  145.  
  146.  
  147.  
  148.  
  149.  
Advertisement
Add Comment
Please, Sign In to add comment