Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- kmeanSVM.train = function(x,y,n,C,gamma)
- {
- # Cluster data points
- kmeans.result = stats::kmeans(x, centers = n, iter.max = nrow(x)/10)
- centers = kmeans.result$centers
- cluster = kmeans.result$cluster
- Model = vector(n,mode = 'list')
- nullind = NULL
- for (i in 1:n)
- {
- ind = which(cluster==i)
- # If there's no data falling in this region, keep Model[[i]] = NULL
- if (length(ind)==0)
- {
- nullind = c(nullind,i)
- next
- }
- # Split x and y
- cx = x[ind,]
- cy = y[ind]
- # no need to train when cy only has one value
- if (length(unique(cy))==1)
- Model[[i]] = as.numeric(cy[1])
- else
- {
- Model[[i]] = e1071::svm(x = cx, y = cy, kernel = "radial",
- cost = C, gamma = gamma)
- }
- }
- # Delete useless centers
- if (length(nullind)>0)
- {
- Model = Model[-nullind]
- centers = centers[-nullind,]
- }
- kmeanSVM.learner = list(Model = Model,
- centers = centers,
- levels = levels(y))
- return(structure(kmeanSVM.learner,class='kmeanSVM.learner'))
- }
- kmeanSVM.predict = function(learner, newdata, ...)
- {
- if (class(learner)!='kmeanSVM.learner')
- stop("The learner should have class of 'kmeansSVM.learner'")
- Model = learner$Model
- centers = learner$centers
- n = length(Model)
- # check data type
- newdata = as.matrix(newdata)
- if (!is.matrix(newdata))
- stop('Input data must be a numeric matrix or an object that can be
- coerced to such a matrix.')
- # Get cluster label for new data
- pred.kmeans = kmeans(newdata, centers)
- cluster = pred.kmeans$cluster
- y = rep(0,nrow(newdata))
- for (i in 1:n)
- {
- ind = which(cluster==i)
- if (length(ind)==0) next
- if (class(Model[[i]])!='svm')
- y[ind] = Model[[i]]
- else
- {
- cx = x[ind,]
- y[ind] = predict(Model[[i]],cx,...)
- }
- }
- y = factor(y)
- levels(y) = learner$levels
- return(y)
- }
- # =========================================
- # Test on Breast Cancer data
- # =========================================
- require(MASS)
- data(biopsy)
- x = biopsy[,2:10]
- y = biopsy[,11]
- # Split data
- ind = which(!complete.cases(x))
- x = x[-ind,]
- y = y[-ind]
- set.seed(1024)
- train.ind = sample(nrow(x),300)
- # Training session
- trained.learner = kmeanSVM.train(x = x[train.ind,], y = y[train.ind],
- n = 3, C = 1, gamma = 1)
- # Predict session
- prediction = kmeanSVM.predict(learner = trained.learner,
- newdata = x[-train.ind,])
- # Check result
- table(prediction,y[-train.ind])
- # Compare with single svm
- svm.model = e1071::svm(x = x[train.ind,], y = y[train.ind], C = 1, gamma = 1)
- svm.pred = predict(svm.model, x[-train.ind,])
- table(svm.pred,y[-train.ind])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement