gakonst

Quizz2.R

Jan 22nd, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 1.40 KB | None | 0 0
  1. # Multisplit Gini function
  2. rm(list=ls())
  3.  
  4. gini_process <-function(absfreq){
  5.   freq = prop.table(absfreq, 1)
  6.   freqSum = rowSums(prop.table(absfreq))
  7.   rows = numeric(nrow(freq))
  8.   for (i in 1:nrow(freq)) {
  9.     rows[i] = rownames(freq)[i]
  10.   }
  11.  
  12.   GINIs = numeric(nrow(freq))
  13.   GINI = 0
  14.   for (i in 1:nrow(freq)) {
  15.     GINIs[i] = 1 - freq[rows[i], 'No']^2 - freq[rows[i], 'Yes']^2
  16.     GINI = GINI + freqSum[rows[i]] * GINIs[i]
  17.   }
  18.   return (c(GINI, GINIs))
  19. }
  20.  
  21.  
  22. # Read data from disk
  23. car_data = read.csv("../car_data.csv")
  24.  
  25. # Create tables with frequencies for customers
  26. customer_ids = table(car_data[, c(1, 5)])
  27. gini_data = gini_process(customer_ids)
  28. CUSTOMERID_GINI_TOTAL = gini_data[1]
  29. CUSTOMERID_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
  30.  
  31. # Create tables with frequencies for sex
  32. sex = table(car_data[, c(2, 5)])
  33. gini_data = gini_process(sex)
  34. SEX_GINI_TOTAL = gini_data[1]
  35. SEX_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
  36.  
  37. # Create tables with frequencies for cars
  38. car_types = table(car_data[, c(3, 5)])
  39. gini_data = gini_process(car_types)
  40. CARTYPES_GINI_TOTAL = gini_data[1]
  41. CARTYPES_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
  42.  
  43. # Create tables with frequencies for budget
  44. budget = table(car_data[, c(4, 5)])
  45. gini_data = gini_process(budget)
  46. BUDGET_GINI_TOTAL = gini_data[1]
  47. BUDGET_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
Advertisement
Add Comment
Please, Sign In to add comment