Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <RcppArmadillo.h>
- #include <algorithm>
- #include <numeric>
- using namespace Rcpp;
- using namespace arma;
- // [[Rcpp::depends(RcppArmadillo)]]
- // [[Rcpp::export]]
- arma::uvec intersection(const arma::uvec& x_,const arma::uvec& y_) {
- std::vector<double> x(x_.begin(),x_.end()),y(y_.begin(),y_.end());
- std::sort(x.begin(),x.end());std::sort(y.begin(),y.end());
- std::vector<double> temp;
- std::set_intersection(x.begin(),x.end(),y.begin(),y.end(),std::back_inserter(temp));
- arma::uvec out=arma::conv_to<arma::uvec>::from(temp);
- return out;
- }
- // [[Rcpp::export]]
- arma::vec groupSum(const arma::vec& x_,const arma::vec& group_) {
- uvec index=arma::sort_index(group_);
- vec x=x_.elem(index);
- vec group=group_.elem(index);
- std::vector<double> temp;
- int row=0;
- temp.push_back(0);
- for (int i=0;i<x.size();i++) {
- temp[row]+=x[i];
- if (group[i]!=group[i+1] && i!=(x.size()-1)) {
- row++;
- temp.push_back(0);
- }
- }
- return arma::conv_to<arma::vec>::from(temp);
- }
- // [[Rcpp::export]]
- DataFrame tsTotal1(const NumericVector& value_,const NumericVector& weight_,
- const NumericVector& strata_,const NumericVector& cluster_) {
- // calculates totals, standard errors for totals, percent, standard error for
- // percent for one way contigency table
- // make arma vecs
- vec value(value_), weight(weight_), strata(strata_), cluster(cluster_);
- // unique levels and strata
- vec levels=unique(value);
- vec strats=unique(strata);
- // allocate memory for output
- vec totals(levels.size());
- vec variances(levels.size());
- vec percent(levels.size());
- vec percentSE(levels.size());
- vec outVal(levels.size());
- Rcpp::Rcout << "# of Strata: " << strats.size() << std::endl;
- double total=sum(weight);
- Rcpp::Rcout << "Sum of weights: " << total << std::endl;
- int totalclusters=0;
- // allocate memorey for objects in loop
- int clustnum;
- double variance,pvar,cn;
- vec clustsum, subweights, pclustsum;
- // for each level
- for (int i=0;i<levels.size();i++) {
- // total summed weight
- uvec valdelta=arma::find(value==levels[i]);
- totals[i]=sum(weight.elem(valdelta));
- percent[i]=totals[i]/total;
- outVal[i]=levels[i];
- // init variance sum
- variance=0;pvar=0;
- // for each strata
- for (int j=0;j<strats.size();j++) {
- // clusters nested in strata
- uvec stratdelta=arma::find(strata==strats[j]);
- vec subclust=unique(cluster.elem(stratdelta)); //shouldn't define vec in loop
- clustnum=subclust.size();
- if (clustnum==1) {
- continue;
- }
- totalclusters+=clustnum;
- cn=clustnum;
- clustsum.set_size(clustnum);
- pclustsum.set_size(clustnum);
- subweights.set_size(stratdelta.size());
- subweights=weight.elem(stratdelta);
- subweights.elem(arma::find(value.elem(stratdelta)!=levels[i])).zeros();
- clustsum=groupSum(subweights,cluster.elem(stratdelta));
- pclustsum=(clustsum-percent[i]*groupSum(weight.elem(stratdelta),cluster.elem(stratdelta)))/total;
- // add variance of strata j
- variance+=(cn/(cn-1))*sum(pow(clustsum-mean(clustsum),2));
- pvar+=(cn/(cn-1))*sum(pow(pclustsum-mean(pclustsum),2));
- }
- // take sqrt and append to output vec
- variances[i]=sqrt(variance);
- percentSE[i]=sqrt(pvar);
- }
- Rcpp::Rcout << "# of Clusters: " << totalclusters/levels.size() << std::endl;
- return DataFrame::create(_["values"]=outVal,
- _["totals"]=totals,
- _["SE"]=variances,
- _["percent"]=percent*100,
- _["percentSE"]=percentSE*100);
- }
- // [[Rcpp::export]]
- DataFrame tsTotal2(const NumericVector& value_,const NumericVector& value2_, const NumericVector& weight_,
- const NumericVector& strata_,const NumericVector& cluster_) {
- // calculates totals, standard errors for totals, percent, standard error for percent
- // column and row percent and their standard errors for a two way contigency table
- // make arma vecs
- vec value(value_), value2(value2_),weight(weight_), strata(strata_), cluster(cluster_);
- // unique levels and strata
- vec levels=unique(value);
- vec levels2=unique(value2);
- vec strats=unique(strata);
- // allocate memory for output
- vec totals(levels.size()*levels2.size());
- vec variances(levels.size()*levels2.size());
- vec percent(levels.size()*levels2.size());
- vec percentSE(levels.size()*levels2.size());
- vec rpercent(levels.size()*levels2.size());
- vec rpercentSE(levels.size()*levels2.size());
- vec cpercent(levels.size()*levels2.size());
- vec cpercentSE(levels.size()*levels2.size());
- vec outVal(levels.size()*levels2.size());
- vec outVal2(levels.size()*levels2.size());
- Rcpp::Rcout << "# of Strata: " << strats.size() << std::endl;
- double total=sum(weight);
- Rcpp::Rcout << "Sum of weights: " << total << std::endl;
- int totalclusters=0;
- // allocate memorey for objects in loop
- int clustnum, count;
- double variance,pvar,rpvar,cpvar,cn,rtotal,ctotal;
- vec clustsum, subweights, pclustsum, rpclustsum,cpclustsum, rdelta, cdelta;
- // for each level
- count=0;
- for (int i=0;i<levels.size();i++) {
- for (int k=0;k<levels2.size();k++) {
- // total summed weight
- uvec valdelta1=arma::find(value==levels[i]);
- uvec valdelta2=arma::find(value2==levels2[k]);
- totals[count]=sum(weight.elem(intersection(valdelta1,valdelta2)));
- percent[count]=totals[count]/total;
- rtotal=sum(weight.elem(valdelta1));
- ctotal=sum(weight.elem(valdelta2));
- rpercent[count]=totals[count]/rtotal;
- cpercent[count]=totals[count]/ctotal;
- outVal[count]=levels[i];
- outVal2[count]=levels2[k];
- // init variance sum
- variance=0;pvar=0;rpvar=0;cpvar=0;
- // for each strata
- for (int j=0;j<strats.size();j++) {
- // clusters nested in strata
- uvec stratdelta=arma::find(strata==strats[j]);
- vec subclust=unique(cluster.elem(stratdelta)); //shouldn't define vec in loop
- clustnum=subclust.size();
- if (clustnum==1) {
- continue;
- }
- totalclusters+=clustnum;
- cn=clustnum;
- clustsum.set_size(clustnum);
- pclustsum.set_size(clustnum);
- subweights.set_size(stratdelta.size());
- rdelta.set_size(stratdelta.size());
- cdelta.set_size(stratdelta.size());
- subweights=weight.elem(stratdelta);
- subweights.elem(arma::find(value.elem(stratdelta)!=levels[i])).zeros();
- subweights.elem(arma::find(value2.elem(stratdelta)!=levels2[k])).zeros();
- clustsum=groupSum(subweights,cluster.elem(stratdelta));
- pclustsum=(clustsum-percent[count]*groupSum(weight.elem(stratdelta),cluster.elem(stratdelta)))/total;
- rdelta=weight.elem(stratdelta);
- rdelta.elem(arma::find(value.elem(stratdelta)!=levels[i])).zeros();
- cdelta=weight.elem(stratdelta);
- cdelta.elem(arma::find(value2.elem(stratdelta)!=levels2[k])).zeros();
- rpclustsum=(clustsum-rpercent[count]*groupSum(rdelta,cluster.elem(stratdelta)))/rtotal;
- cpclustsum=(clustsum-cpercent[count]*groupSum(cdelta,cluster.elem(stratdelta)))/ctotal;
- // add variance of strata j
- variance+=(cn/(cn-1))*sum(pow(clustsum-mean(clustsum),2));
- pvar+=(cn/(cn-1))*sum(pow(pclustsum-mean(pclustsum),2));
- rpvar+=(cn/(cn-1))*sum(pow(rpclustsum-mean(rpclustsum),2));
- cpvar+=(cn/(cn-1))*sum(pow(cpclustsum-mean(cpclustsum),2));
- }
- // take sqrt and append to output vec
- variances[count]=sqrt(variance);
- percentSE[count]=sqrt(pvar);
- rpercentSE[count]=sqrt(rpvar);
- cpercentSE[count]=sqrt(cpvar);
- count+=1;
- }
- }
- Rcpp::Rcout << "# of Clusters: " << totalclusters/(levels.size()*levels2.size()) << std::endl;
- return DataFrame::create(_["values"]=outVal,
- _["byValues"]=outVal2,
- _["totals"]=totals,
- _["SE"]=variances,
- _["percent"]=percent*100,
- _["percentSE"]=percentSE*100,
- _["colPercent"]=cpercent*100,
- _["colPercentSE"]=cpercentSE*100,
- _["rowPercent"]=rpercent*100,
- _["rowPercentSE"]=rpercentSE*100);
- }
Add Comment
Please, Sign In to add comment