Advertisement
cesarsouza

Lindsay's tutorial in Accord.NET with covariance matrices

May 4th, 2012
245
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 3.66 KB | None | 0 0
  1.     // Reproducing Lindsay Smith's "Tutorial on Principal Component Analysis"
  2.     // using the paper's original method. The tutorial can be found online
  3.     // at http://www.sccg.sk/~haladova/principal_components.pdf
  4.  
  5.     // Step 1. Get some data
  6.     // ---------------------
  7.  
  8.     double[,] data =
  9.     {
  10.         { 2.5,  2.4 },
  11.         { 0.5,  0.7 },
  12.         { 2.2,  2.9 },
  13.         { 1.9,  2.2 },
  14.         { 3.1,  3.0 },
  15.         { 2.3,  2.7 },
  16.         { 2.0,  1.6 },
  17.         { 1.0,  1.1 },
  18.         { 1.5,  1.6 },
  19.         { 1.1,  0.9 }
  20.     };
  21.  
  22.  
  23.     // Step 2. Subtract the mean
  24.     // -------------------------
  25.     //   Note: The framework does this automatically
  26.     //   when computing the covariance matrix. In this
  27.     //   step we will only compute the mean vector.
  28.  
  29.     double[] mean = Accord.Statistics.Tools.Mean(data);
  30.  
  31.  
  32.     // Step 3. Compute the covariance matrix
  33.     // -------------------------------------
  34.  
  35.     double[,] covariance = Accord.Statistics.Tools.Covariance(data, mean);
  36.  
  37.     // Create the analysis using the covariance matrix
  38.     var pca = PrincipalComponentAnalysis.FromCovarianceMatrix(mean, covariance);
  39.  
  40.     // Compute it
  41.     pca.Compute();
  42.  
  43.  
  44.     // Step 4. Compute the eigenvectors and eigenvalues of the covariance matrix
  45.     //--------------------------------------------------------------------------
  46.  
  47.     // Those are the expected eigenvalues, in descending order:
  48.     double[] eigenvalues = { 1.28402771, 0.0490833989 };
  49.  
  50.     // And this will be their proportion:
  51.     double[] proportion = eigenvalues.Divide(eigenvalues.Sum());
  52.  
  53.     // Those are the expected eigenvectors,
  54.     // in descending order of eigenvalues:
  55.     double[,] eigenvectors =
  56.     {
  57.         { -0.677873399, -0.735178656 },
  58.         { -0.735178656,  0.677873399 }
  59.     };
  60.  
  61.     // Now, here is the place most users get confused. The fact is that
  62.     // the Eigenvalue decomposition (EVD) is not unique, and both the SVD
  63.     // and EVD routines used by the framework produces results which are
  64.     // numerically different from packages such as STATA or MATLAB, but
  65.     // those are correct.
  66.  
  67.     // If v is an eigenvector, a multiple of this eigenvector (such as a*v, with
  68.     // a being a scalar) will also be an eigenvector. In the Lindsay case, the
  69.     // framework produces a first eigenvector with inverted signs. This is the same
  70.     // as considering a=-1 and taking a*v. The result is still correct.
  71.  
  72.     // Retrieve the first expected eigenvector
  73.     double[] v = eigenvectors.GetColumn(0);
  74.  
  75.     // Multiply by a scalar and store it back
  76.     eigenvectors.SetColumn(0, v.Multiply(-1));
  77.  
  78.     // Everything is alright (up to the 9 decimal places shown in the tutorial)
  79.     Assert.IsTrue(eigenvectors.IsEqual(pca.ComponentMatrix, threshold: 1e-9));
  80.     Assert.IsTrue(proportion.IsEqual(pca.ComponentProportions, threshold: 1e-9));
  81.     Assert.IsTrue(eigenvalues.IsEqual(pca.Eigenvalues, threshold: 1e-8));
  82.  
  83.  
  84.     // Step 5. Deriving the new data set
  85.     // ---------------------------------
  86.  
  87.     double[,] actual = pca.Transform(data);
  88.  
  89.     // transformedData shown in pg. 18
  90.     double[,] expected = new double[,]
  91.     {
  92.         {  0.827970186, -0.175115307 },
  93.         { -1.77758033,   0.142857227 },
  94.         {  0.992197494,  0.384374989 },
  95.         {  0.274210416,  0.130417207 },
  96.         {  1.67580142,  -0.209498461 },
  97.         {  0.912949103,  0.175282444 },
  98.         { -0.099109437, -0.349824698 },
  99.         { -1.14457216,   0.046417258 },
  100.         { -0.438046137,  0.017764629 },
  101.         { -1.22382056,  -0.162675287 },
  102.     };
  103.  
  104.     // Everything is correct (up to 8 decimal places)
  105.     Assert.IsTrue(expected.IsEqual(actual, threshold: 1e-8));
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement