Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Simple PCA analysis with PLINK
- # Clear workspace
- rm(list = ls())
- # Set working directory
- setwd("d:/analysis/2020_GenomicsBootCamp_Demo/")
- library(tidyverse)
- # Read in the data - ADAPTmap
- goats <- read_tsv("ADAPTmap_genotypeTOP_20160222_full.fam", col_names = F)
- # select goat breeds
- selectedAnimals <- goats %>%
- filter(X1 == "TOG" | X1 == "RAN"| X1 == "TED") %>%
- write_delim("selectedAnimals.txt", col_names = F)
- # perform Quality control
- system(str_c("plink --bfile ADAPTmap_genotypeTOP_20160222_full --chr-set 29 --autosome ",
- "--keep selectedAnimals.txt --nonfounders ",
- "--geno 0.1 --mind 0.1 --maf 0.05 ",
- "--make-bed --out afterQC"))
- # do the PCA with PLINK
- system("plink --bfile afterQC --chr-set 29 --pca --out plinkPCA")
- ###
- # Visualize PCA results
- ###
- # read in result files
- eigenValues <- read_delim("plinkPCA.eigenval", delim = " ", col_names = F)
- eigenVectors <- read_delim("plinkPCA.eigenvec", delim = " ", col_names = F)
- ## Proportion of variation captured by each vector
- eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
- # PCA plot
- ggplot(data = eigenVectors) +
- geom_point(mapping = aes(x = X3, y = X4, color = X1, shape = X1), size = 3, show.legend = TRUE ) +
- geom_hline(yintercept = 0, linetype="dotted") +
- geom_vline(xintercept = 0, linetype="dotted") +
- labs(title = "PCA of selected goat populations",
- x = paste0("Principal component 1 (",eigen_percent[1,1]," %)"),
- y = paste0("Principal component 2 (",eigen_percent[2,1]," %)"),
- colour = "Goat breeds", shape = "Goat breeds") +
- theme_minimal()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement