Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(Rcpp)
- library(inline)
- plug <- Rcpp:::Rcpp.plugin.maker(include.before = "#include <daal.h> ", libs = paste("-L$DAALROOT/lib/ -ldaal_core -ldaal_thread ","-ltbb -lpthread -lm", sep=""))
- registerPlugin("daalNB", plug)
- readCSV <- '
- using namespace daal;
- using namespace daal::data_management;
- // Inputs:
- // file - file name
- // ncols - number of columns in file
- std::string fname = Rcpp::as<std::string>(file);
- int k = Rcpp::as<int>(ncols);
- // Data source
- FileDataSource<CSVFeatureManager> dataSource(fname, DataSource::notAllocateNumericTable, DataSource::doDictionaryFromContext);
- // DAAL NumericTables for data and labels
- ervices::SharedPtr<NumericTable> data(
- new HomogenNumericTable<double>(k-1, 0, NumericTable::notAllocate));
- services::SharedPtr<NumericTable> labels(
- new HomogenNumericTable<int>(1, 0, NumericTable::notAllocate));
- services::SharedPtr<NumericTable> merged(new MergedNumericTable(data, labels));
- // Load data
- dataSource.loadDataBlock(merged.get());
- // Serialize NumericTables
- InputDataArchive dataArch, labelsArch;
- data->serialize(dataArch);
- labels->serialize(labelsArch);
- Rcpp::RawVector dataBytes(dataArch.getSizeOfArchive());
- dataArch.copyArchiveToArray(&dataBytes[0], dataArch.getSizeOfArchive());
- Rcpp::RawVector labelsBytes(labelsArch.getSizeOfArchive());
- abelsArch.copyArchiveToArray(&labelsBytes[0], labelsArch.getSizeOfArchive());
- // Return a list of RawVectors
- return Rcpp::List::create(
- ["data"] = dataBytes,
- ["labels"] = labelsBytes);'
- train <- '
- using namespace daal;
- using namespace daal::algorithms;
- using namespace daal::algorithms::multinomial_naive_bayes;
- using namespace daal::data_management;
- // Inputs:
- // X - training dataset
- // y - training data groundtruth
- // nclasses - number of classes
- Rcpp::RawVector Xr(X);
- Rcpp::RawVector yr(y);
- int nClasses = Rcpp::as<int>(nclasses);
- // Deserialize data and labels
- OutputDataArchive dataArch(&Xr[0], Xr.length());
- services::SharedPtr<NumericTable> ntData(new HomogenNumericTable<double>());
- ntData->deserialize(dataArch);
- OutputDataArchive labelsArch(&yr[0], yr.length());
- services::SharedPtr<NumericTable> ntLabels(new HomogenNumericTable<int>());
- ntLabels->deserialize(labelsArch);
- // Train a model
- training::Batch<> algorithm(nClasses);
- algorithm.input.set(classifier::training::data, ntData);
- algorithm.input.set(classifier::training::labels, ntLabels);
- algorithm.compute();
- // Get result
- services::SharedPtr<training::Result> result = algorithm.getResult();
- InputDataArchive archive;
- result->get(classifier::training::model)->serialize(archive);
- Rcpp::RawVector out(archive.getSizeOfArchive());
- archive.copyArchiveToArray(&out[0], archive.getSizeOfArchive());
- return out;'
- # Naive Bayes: predict
- predict <- '
- using namespace daal;
- using namespace daal::algorithms;
- using namespace daal::algorithms::multinomial_naive_bayes;
- using namespace daal::data_management;
- // Inputs:
- // model - a trained model
- // X - input data
- // nclasses - number of classes
- Rcpp::RawVector modelBytes(model);
- Rcpp::RawVector dataBytes(X);
- int nClasses = Rcpp::as<int>(nclasses);
- // Retrieve model
- OutputDataArchive modelArch(&modelBytes[0], modelBytes.length());
- services::SharedPtr<multinomial_naive_bayes::Model> nb(
- new multinomial_naive_bayes::Model());
- nb->deserialize(modelArch);
- // Deserialize data
- OutputDataArchive dataArch(&dataBytes[0], dataBytes.length());
- services::SharedPtr<NumericTable> ntData(new HomogenNumericTable<double>());
- ntData->deserialize(dataArch);
- // Predict for new data
- prediction::Batch<> algorithm(nClasses);
- algorithm.input.set(classifier::prediction::data, ntData);
- algorithm.input.set(classifier::prediction::model, nb);
- algorithm.compute();
- // Return newlabels
- services::SharedPtr<NumericTable> predictionResult =
- algorithm.getResult()->get(classifier::prediction::prediction);
- BlockDescriptor<int> block;
- int n = predictionResult->getNumberOfRows();
- predictionResult->getBlockOfRows(0, n, readOnly, block);
- int* newlabels = block.getBlockPtr();
- IntegerVector predictedLabels(n);
- std::copy(newlabels, newlabels+n, predictedLabels.begin());
- return predictedLabels;'
- loadData <- cxxfunction(signature(file="character", ncols="integer"), readCSV, plugin="daalNB")
- nbTrain <- cxxfunction(signature(X="raw", y="raw", nclasses="integer"), train, plugin="daalNB")
- nbPredict <- cxxfunction(signature(model="raw", X="raw", nclasses="integer"), predict, plugin="daalNB")
Add Comment
Please, Sign In to add comment