Advertisement
GenomicsBootCamp

Simple PCA with PLINK

Jun 22nd, 2021
1,873
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Simple PCA analysis with PLINK
  2.  
  3. # Clear workspace
  4. rm(list = ls())
  5. # Set working directory
  6. setwd("d:/analysis/2020_GenomicsBootCamp_Demo/")
  7. library(tidyverse)
  8.  
  9. # Read in the data - ADAPTmap
  10. goats <- read_tsv("ADAPTmap_genotypeTOP_20160222_full.fam", col_names = F)
  11.  
  12. # select goat breeds
  13. selectedAnimals <- goats %>%
  14.   filter(X1 == "TOG" | X1 == "RAN"| X1 == "TED") %>%
  15.   write_delim("selectedAnimals.txt", col_names = F)
  16.  
  17. # perform Quality control
  18. system(str_c("plink --bfile ADAPTmap_genotypeTOP_20160222_full --chr-set 29 --autosome ",
  19.              "--keep selectedAnimals.txt --nonfounders ",
  20.              "--geno 0.1 --mind 0.1 --maf 0.05 ",
  21.              "--make-bed --out afterQC"))
  22.  
  23. # do the PCA with PLINK
  24. system("plink --bfile afterQC --chr-set 29 --pca --out plinkPCA")
  25.  
  26. ###
  27. # Visualize PCA results
  28. ###
  29.  
  30. # read in result files
  31. eigenValues <- read_delim("plinkPCA.eigenval", delim = " ", col_names = F)
  32. eigenVectors <- read_delim("plinkPCA.eigenvec", delim = " ", col_names = F)
  33.  
  34. ## Proportion of variation captured by each vector
  35. eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
  36.  
  37. # PCA plot
  38. ggplot(data = eigenVectors) +
  39.   geom_point(mapping = aes(x = X3, y = X4, color = X1, shape = X1), size = 3, show.legend = TRUE ) +
  40.   geom_hline(yintercept = 0, linetype="dotted") +
  41.   geom_vline(xintercept = 0, linetype="dotted") +
  42.   labs(title = "PCA of selected goat populations",
  43.       x = paste0("Principal component 1 (",eigen_percent[1,1]," %)"),
  44.       y = paste0("Principal component 2 (",eigen_percent[2,1]," %)"),
  45.       colour = "Goat breeds", shape = "Goat breeds") +
  46.   theme_minimal()
  47.  
Advertisement
RAW Paste Data Copied
Advertisement