Advertisement
cesarsouza

Reproducing Lindsay's tutorial using Accord.NET

May 4th, 2012
346
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 4.31 KB | None | 0 0
  1.     // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
  2.     // using the framework's default method. The tutorial can be found online
  3.     // at http://www.sccg.sk/~haladova/principal_components.pdf
  4.  
  5.     // Step 1. Get some data
  6.     // ---------------------
  7.  
  8.     double[,] data =
  9.     {
  10.         { 2.5,  2.4 },
  11.         { 0.5,  0.7 },
  12.         { 2.2,  2.9 },
  13.         { 1.9,  2.2 },
  14.         { 3.1,  3.0 },
  15.         { 2.3,  2.7 },
  16.         { 2.0,  1.6 },
  17.         { 1.0,  1.1 },
  18.         { 1.5,  1.6 },
  19.         { 1.1,  0.9 }
  20.     };
  21.  
  22.  
  23.     // Step 2. Subtract the mean
  24.     // -------------------------
  25.     //   Note: The framework does this automatically. By default, the framework
  26.     //   uses the "Center" method, which only subtracts the mean. However, it is
  27.     //   also possible to remove the mean *and* divide by the standard deviation
  28.     //   (thus performing the correlation method) by specifying "Standardize"
  29.     //   instead of "Center" as the AnalysisMethod.
  30.  
  31.     AnalysisMethod method = AnalysisMethod.Center; // AnalysisMethod.Standardize
  32.  
  33.  
  34.     // Step 3. Compute the covariance matrix
  35.     // -------------------------------------
  36.     //   Note: Accord.NET does not need to compute the covariance
  37.     //   matrix in order to compute PCA. The framework uses the SVD
  38.     //   method which is more numerically stable, but may require
  39.     //   more processing or memory. In order to replicate the tutorial
  40.     //   using covariance matrices, please see the next unit test.
  41.  
  42.     // Create the analysis using the selected method
  43.     var pca = new PrincipalComponentAnalysis(data, method);
  44.  
  45.     // Compute it
  46.     pca.Compute();
  47.  
  48.  
  49.     // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
  50.     // -------------------------------------------------------------------------
  51.     //   Note: Since Accord.NET uses the SVD method rather than the Eigendecomposition
  52.     //   method, the Eigenvalues are not directly available. However, it is not the
  53.     //   Eigenvalues themselves which are important, but rather their proportion:
  54.  
  55.     // Those are the expected eigenvalues, in descending order:
  56.     double[] eigenvalues = { 1.28402771, 0.0490833989 };
  57.  
  58.     // And this will be their proportion:
  59.     double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
  60.  
  61.     // Those are the expected eigenvectors,
  62.     // in descending order of eigenvalues:
  63.     double[,] eigenvectors =
  64.     {
  65.         { -0.677873399, -0.735178656 },
  66.         { -0.735178656,  0.677873399 }
  67.     };
  68.  
  69.     // Now, here is the place most users get confused. The fact is that
  70.     // the Eigenvalue decomposition (EVD) is not unique, and both the SVD
  71.     // and EVD routines used by the framework produces results which are
  72.     // numerically different from packages such as STATA or MATLAB, but
  73.     // those are correct.
  74.  
  75.     // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
  76.     // a being a scalar) will also be an eigenvector. In the Lindsay case, the
  77.     // framework produces a first eigenvector with inverted signs. This is the same
  78.     // as considering a=-1 and taking a*v. The result is still correct.
  79.  
  80.     // Retrieve the first expected eigenvector
  81.     double[] v = eigenvectors.GetColumn(0);
  82.  
  83.     // Multiply by a scalar and store it back
  84.     eigenvectors.SetColumn(0, v.Multiply(-1));
  85.  
  86.     // Everything is alright (up to the 9 decimal places shown in the tutorial)
  87.     Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9));
  88.     Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9));
  89.  
  90.  
  91.     // Step 5. Deriving the new data set
  92.     // ---------------------------------
  93.  
  94.     double[,] actual = pca.Transform(data);
  95.  
  96.     // transformedData shown in pg. 18
  97.     double[,] expected = new double[,]
  98.     {
  99.         {  0.827970186, -0.175115307 },
  100.         { -1.77758033,   0.142857227 },
  101.         {  0.992197494,  0.384374989 },
  102.         {  0.274210416,  0.130417207 },
  103.         {  1.67580142,  -0.209498461 },
  104.         {  0.912949103,  0.175282444 },
  105.         { -0.099109437, -0.349824698 },
  106.         { -1.14457216,   0.046417258 },
  107.         { -0.438046137,  0.017764629 },
  108.         { -1.22382056,  -0.162675287 },
  109.     };
  110.  
  111.     // Everything is correct (up to 8 decimal places)
  112.     Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8));
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement