draxus

RobustPCA.java

Feb 19th, 2014
254
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import org.rosuda.JRI.REXP;
  2. import org.rosuda.JRI.RMainLoopCallbacks;
  3. import org.rosuda.JRI.Rengine;
  4.  
  5. /**
  6.  * Wrapper for Robust PCA R method
  7.  *
  8.  * Documentation: http://cran.r-project.org/web/packages/rrcov/index.html
  9.  *
  10.  * In order to work in Windows, the next environmental variables have to be set:
  11.  * PATH          = %YOUR_PATH_TO_R%\R-3.0.2\library\rJava\jri\x64;%YOUR_PATH_TO_R%\R-3.0.2\bin\x64
  12.  * R_DOC_DIR     = %YOUR_PATH_TO_R%\R-3.0.2\doc
  13.  * R_HOME        = %YOUR_PATH_TO_R%\R-3.0.2
  14.  * R_INCLUDE_DIR = %YOUR_PATH_TO_R%\R-3.0.2\include
  15.  * R_SHARE_DIR   = %YOUR_PATH_TO_R%\R-3.0.2\share
  16.  *
  17.  *
  18.  *
  19.  * Beware with Windows environmental variables extension: it's alphabetical!!
  20.  * For example, you can't use %R_HOME% in PATH variable.
  21.  * http://bit.ly/IK2EW2
  22.  *
  23.  * JRI library is provided by rJava package
  24.  * install.packages('rJava', repos='http://cran.ma.imperial.ac.uk/')
  25.  *
  26.  * rrcov package has to be installed in the R library
  27.  * install.packages('rrcov', repos='http://cran.ma.imperial.ac.uk/')
  28.  *
  29.  * @author Manuel Martin <draxus@gmail.com>
  30.  *
  31.  */
  32. public class RobustPCA {
  33.     private Rengine re = null; // initialized in constructor or autowired
  34.     private int NUMBER_OF_COMPONENTS;
  35.    
  36.     public RobustPCA(){
  37.         NUMBER_OF_COMPONENTS = 0;
  38.     }
  39.    
  40.     public RobustPCA(int components) throws Exception{
  41.         if (components<0){
  42.             throw new Exception(this, "The number of components has to be positive");
  43.         }
  44.         NUMBER_OF_COMPONENTS = components;
  45.     }
  46.    
  47.     private void runREngine(){
  48.         if (!Rengine.versionCheck()) {
  49.             System.err.println("** Version mismatch - Java files don't match library version.");
  50.             System.exit(1);
  51.         }
  52.        
  53.         System.out.println("Creating Rengine");
  54.         re = new Rengine(null, false, new TextConsole());
  55.         System.out.println("Rengine created, waiting for R");
  56.         if (!re.waitForR()) {
  57.             System.out.println("Cannot load R");
  58.         }
  59.     }
  60.    
  61.     protected void finalize() throws Throwable{
  62.         //close R engine
  63.         if (re!=null){
  64.             re.eval("q()");
  65.             re.end();
  66.             re = null;
  67.         }
  68.     }
  69.    
  70.     public double[][] process(double[][] input) throws Exception{
  71.         // Assuming that input is a matrix of size nxp
  72.         if(input.length < input[0].length){
  73.             throw new Exception(this, "The number of instances must be greater than the number of attributes to perform Robust PCA");
  74.         }
  75.        
  76.         if(re == null){
  77.             runREngine();
  78.         }
  79.         else{ //restart REngine for re-training
  80.             re.eval("q()");
  81.             re.end();
  82.             re = null;
  83.             runREngine();
  84.         }
  85.        
  86.         // Create the matrix in R (except the predicted variables)
  87.         re.eval("m <- " + matrixToR(input));
  88.  
  89.         // Load library rrcov
  90.         REXP loaded = re.eval("library(rrcov)",false);
  91.         if(loaded==null){
  92.             throw new Exception(this, "The library rrcov has not been found");
  93.         }
  94.        
  95.         // Remove missing values
  96.         re.eval("m <- na.omit(m)");
  97.        
  98.         // Robust PCA
  99.         REXP pca = re.eval("pca <- PcaCov(m, k="+NUMBER_OF_COMPONENTS+")"); //don't need to store in Java because REngine is the same
  100.         if(pca==null){
  101.             throw new Exception(this, "There was an error computing PcaCov");
  102.         }
  103.        
  104.         REXP x = re.eval("scores <- scale(m, center=pca@center, scale=FALSE) %*% pca@loadings");
  105.         double newData[][] = x.asDoubleMatrix();
  106.        
  107.         // Clean workspace
  108.         re.eval("rm(m)");
  109.  
  110.         return newData;
  111.     }
  112.    
  113.     private String matrixToR(double[][] matrix){
  114.        
  115.         String matrixString = "";
  116.         for(int i=0; i<matrix.length; i++){
  117.             for(int j=0; j<matrix[i].length; j++){
  118.                 if (Double.isNaN((Double) matrix[i][j])){ //missing value
  119.                     matrixString += "NA,";
  120.                 }
  121.                 else{
  122.                     matrixString += String.valueOf(matrix.[i][j]) + ",";
  123.                 }
  124.                
  125.             }
  126.         }
  127.         matrixString = matrixString.substring(0, matrixString.length()-1); //remove the last comma
  128.        
  129.         // Matrix creation in R
  130.         return "matrix(c("+matrixString+"), nrow="+matrix.length+", ncol="+matrix[0].length+", byrow=TRUE)";
  131.     }
  132. }
  133.  
  134.  
  135. class TextConsole implements RMainLoopCallbacks{
  136.     public void rWriteConsole(Rengine re, String text, int oType) {
  137.         System.out.print(text);
  138.     }
  139.  
  140.     public void rBusy(Rengine re, int which) {
  141.         System.out.println("rBusy("+which+")");
  142.     }
  143.  
  144.     public String rReadConsole(Rengine re, String prompt, int addToHistory) {
  145.         System.out.print(prompt);
  146.         try {
  147.             BufferedReader br=new BufferedReader(new InputStreamReader(System.in));
  148.             String s=br.readLine();
  149.             return (s==null||s.length()==0)?s:s+"\n";
  150.         } catch (Exception e) {
  151.             System.out.println("jriReadConsole exception: "+e.getMessage());
  152.         }
  153.         return null;
  154.     }
  155.  
  156.     public void rShowMessage(Rengine re, String message) {
  157.         System.out.println("rShowMessage \""+message+"\"");
  158.     }
  159.  
  160.     public String rChooseFile(Rengine re, int newFile) {
  161.         FileDialog fd = new FileDialog(new Frame(), (newFile==0)?"Select a file":"Select a new file", (newFile==0)?FileDialog.LOAD:FileDialog.SAVE);
  162.         fd.show();
  163.         String res=null;
  164.         if (fd.getDirectory()!=null) res=fd.getDirectory();
  165.         if (fd.getFile()!=null) res=(res==null)?fd.getFile():(res+fd.getFile());
  166.         return res;
  167.     }
  168.  
  169.     public void   rFlushConsole (Rengine re) {
  170.     }
  171.  
  172.     public void   rLoadHistory  (Rengine re, String filename) {
  173.     }          
  174.  
  175.     public void   rSaveHistory  (Rengine re, String filename) {
  176.     }          
  177. }
RAW Paste Data