Untitled

kmeanSVM.train = function(x,y,n,C,gamma)
{
    # Cluster data points
    kmeans.result = stats::kmeans(x, centers = n, iter.max = nrow(x)/10)
    centers = kmeans.result$centers
    cluster = kmeans.result$cluster

    Model = vector(n,mode = 'list')
    nullind = NULL
    for (i in 1:n)
    {
        ind = which(cluster==i)

        # If there's no data falling in this region, keep Model[[i]] = NULL
        if (length(ind)==0)
        {
            nullind = c(nullind,i)
            next
        }

        # Split x and y
        cx = x[ind,]
        cy = y[ind]

        # no need to train when cy only has one value
        if (length(unique(cy))==1)
            Model[[i]] = as.numeric(cy[1])
        else
        {
            Model[[i]] = e1071::svm(x = cx, y = cy, kernel = "radial",
                               cost = C, gamma = gamma)
        }
    }

    # Delete useless centers
    if (length(nullind)>0)
    {
        Model = Model[-nullind]
        centers = centers[-nullind,]
    }
    kmeanSVM.learner = list(Model = Model,
                            centers = centers,
                            levels = levels(y))
    return(structure(kmeanSVM.learner,class='kmeanSVM.learner'))
}

kmeanSVM.predict = function(learner, newdata, ...)
{
    if (class(learner)!='kmeanSVM.learner')
        stop("The learner should have class of 'kmeansSVM.learner'")
    Model = learner$Model
    centers = learner$centers
    n = length(Model)

    # check data type
    newdata = as.matrix(newdata)
    if (!is.matrix(newdata))
        stop('Input data must be a numeric matrix or an object that can be
             coerced to such a matrix.')

    # Get cluster label for new data
    pred.kmeans = kmeans(newdata, centers)
    cluster = pred.kmeans$cluster

    y = rep(0,nrow(newdata))
    for (i in 1:n)
    {
        ind = which(cluster==i)
        if (length(ind)==0) next
        if (class(Model[[i]])!='svm')
            y[ind] = Model[[i]]
        else
        {
            cx = x[ind,]
            y[ind] = predict(Model[[i]],cx,...)
        }
    }
    y = factor(y)
    levels(y) = learner$levels
    return(y)
}

# =========================================
# Test on Breast Cancer data
# =========================================

require(MASS)
data(biopsy)
x = biopsy[,2:10]
y = biopsy[,11]

# Split data
ind = which(!complete.cases(x))
x = x[-ind,]
y = y[-ind]
set.seed(1024)
train.ind = sample(nrow(x),300)

# Training session
trained.learner = kmeanSVM.train(x = x[train.ind,], y = y[train.ind],
                                 n = 3, C = 1, gamma = 1)
# Predict session
prediction = kmeanSVM.predict(learner = trained.learner,
                              newdata = x[-train.ind,])
# Check result
table(prediction,y[-train.ind])

# Compare with single svm
svm.model = e1071::svm(x = x[train.ind,], y = y[train.ind], C = 1, gamma = 1)
svm.pred = predict(svm.model, x[-train.ind,])
table(svm.pred,y[-train.ind])