Advertisement
gakonst

Untitled

Jan 23rd, 2018
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.35 KB | None | 0 0
  1. rm(list=ls())
  2.  
  3. # Read data from disk
  4.  
  5. car_data = read.csv("../car_data.csv")
  6. # Create tables with freqs for CID
  7. absfreq = table(car_data[, c(1, 5)])
  8. freq = prop.table(absfreq, 1)
  9. freqSum = rowSums(prop.table(absfreq))
  10.  
  11. GINI_CUSTOMERS = numeric(20)
  12. GINI_ID = 0
  13. for (i in 1:20) {
  14. GINI_CUSTOMERS[i] = 1 - freq[i, 'No']^2 - freq[i, 'Yes']^2
  15. GINI_ID = GINI_ID + freqSum[i] * GINI_CUSTOMERS[i]
  16. }
  17.  
  18.  
  19. # Create tables with frequencies for Sex
  20. absfreq = table(car_data[, c(2, 5)])
  21. freq = prop.table(absfreq, 1)
  22. freqSum = rowSums(prop.table(absfreq))
  23.  
  24. # Calculate GINI index of Sex
  25. GINI_Male = 1 - freq["M", "No"]^2 - freq["M", "Yes"]^2
  26. GINI_Female = 1 - freq["F", "No"]^2 - freq["F", "Yes"]^2
  27. GINI_Sex = freqSum["M"] * GINI_Male + freqSum["F"] * GINI_Female
  28.  
  29. ##########################
  30. # Types: Sedan / Family / Sport
  31. # Create tables with frequencies for CarType, multisplit
  32. splita = car_data[,c(3,5)]
  33. splita$CarType = as.character(splita$CarType)
  34. splita$CarType[splita$CarType == 'Sedan'] <- 'FamilySedan'
  35. splita$CarType[splita$CarType == 'Family'] <- 'FamilySedan'
  36. absfreq = table(splita)
  37. freq = prop.table(absfreq, 1)
  38. freqSum = rowSums(prop.table(absfreq))
  39.  
  40. # Calculate GINI index when splitting Family-Sedan vs Sport
  41. GINI_FamilySedan = 1 - freq["FamilySedan", "No"]^2 - freq["FamilySedan", "Yes"]^2
  42. GINI_Sport = 1 - freq["Sport", "No"]^2 - freq["Sport", "Yes"]^2
  43. GINI_SplitA = freqSum["FamilySedan"] * GINI_FamilySedan + freqSum["Sport"] * GINI_Sport
  44.  
  45. ############### Family-Sport vs Sedan
  46.  
  47. splitb = car_data[,c(3,5)]
  48. splitb$CarType = as.character(splitb$CarType)
  49. splitb$CarType[splitb$CarType == 'Family'] <- 'FamilySport'
  50. splitb$CarType[splitb$CarType == 'Sport'] <- 'FamilySport'
  51. absfreq = table(splitb)
  52. freq = prop.table(absfreq, 1)
  53. freqSum = rowSums(prop.table(absfreq))
  54.  
  55. # Calculate GINI index when splitting Family-Sedan vs Sport
  56. GINI_FamilySport = 1 - freq["FamilySport", "No"]^2 - freq["FamilySport", "Yes"]^2
  57. GINI_Sedan = 1 - freq["Sedan", "No"]^2 - freq["Sedan", "Yes"]^2
  58. GINI_SplitB = freqSum["FamilySport"] * GINI_FamilySport + freqSum["Sedan"] * GINI_Sedan
  59.  
  60. ### SPORT SEDAN VS FAMILY
  61. splitc = car_data[,c(3,5)]
  62. splitc$CarType = as.character(splitc$CarType)
  63. splitc$CarType[splitc$CarType == 'Sport'] <- 'SportSedan'
  64. splitc$CarType[splitc$CarType == 'Sedan'] <- 'SportSedan'
  65. absfreq = table(splitc)
  66. freq = prop.table(absfreq, 1)
  67. freqSum = rowSums(prop.table(absfreq))
  68.  
  69. # Calculate GINI index when splitting Family-Sedan vs Sport
  70. GINI_SportSedan = 1 - freq["SportSedan", "No"]^2 - freq["SportSedan", "Yes"]^2
  71. GINI_Family = 1 - freq["Family", "No"]^2 - freq["Family", "Yes"]^2
  72. GINI_SplitC = freqSum["SportSedan"] * GINI_SportSedan + freqSum["Family"] * GINI_Family
  73.  
  74. ##################### Budget
  75.  
  76. ############### Family-Sport vs Sedan
  77.  
  78. splitb = car_data[,c(3,5)]
  79. splitb$CarType = as.character(splitb$CarType)
  80. splitb$CarType[splitb$CarType == 'Family'] <- 'FamilySport'
  81. splitb$CarType[splitb$CarType == 'Sport'] <- 'FamilySport'
  82. absfreq = table(splitb)
  83. freq = prop.table(absfreq, 1)
  84. freqSum = rowSums(prop.table(absfreq))
  85.  
  86. # Calculate GINI index when splitting Family-Sedan vs Sport
  87. GINI_FamilySport = 1 - freq["FamilySport", "No"]^2 - freq["FamilySport", "Yes"]^2
  88. GINI_Sedan = 1 - freq["Sedan", "No"]^2 - freq["Sedan", "Yes"]^2
  89. GINI_SplitB = freqSum["FamilySport"] * GINI_FamilySport + freqSum["Sedan"] * GINI_Sedan
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement