Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Multisplit Gini function
- rm(list=ls())
- gini_process <-function(absfreq){
- freq = prop.table(absfreq, 1)
- freqSum = rowSums(prop.table(absfreq))
- rows = numeric(nrow(freq))
- for (i in 1:nrow(freq)) {
- rows[i] = rownames(freq)[i]
- }
- GINIs = numeric(nrow(freq))
- GINI = 0
- for (i in 1:nrow(freq)) {
- GINIs[i] = 1 - freq[rows[i], 'No']^2 - freq[rows[i], 'Yes']^2
- GINI = GINI + freqSum[rows[i]] * GINIs[i]
- }
- return (c(GINI, GINIs))
- }
- # Read data from disk
- car_data = read.csv("../car_data.csv")
- # Create tables with frequencies for customers
- customer_ids = table(car_data[, c(1, 5)])
- gini_data = gini_process(customer_ids)
- CUSTOMERID_GINI_TOTAL = gini_data[1]
- CUSTOMERID_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
- # Create tables with frequencies for sex
- sex = table(car_data[, c(2, 5)])
- gini_data = gini_process(sex)
- SEX_GINI_TOTAL = gini_data[1]
- SEX_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
- # Create tables with frequencies for cars
- car_types = table(car_data[, c(3, 5)])
- gini_data = gini_process(car_types)
- CARTYPES_GINI_TOTAL = gini_data[1]
- CARTYPES_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
- # Create tables with frequencies for budget
- budget = table(car_data[, c(4, 5)])
- gini_data = gini_process(budget)
- BUDGET_GINI_TOTAL = gini_data[1]
- BUDGET_GINI_PER_CLASS = gini_data[(-1)] # negative index = skip
Advertisement
Add Comment
Please, Sign In to add comment