Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Data check and preparation to examine crossbreds with the Structure software
- # See video on the Genomics Boot Camp YouTube channel
- # Prerequisites:
- # 1) Download and install Structure https://web.stanford.edu/group/pritchardlab/structure.html
- # 2) Get data https://datadryad.org/stash/dataset/doi:10.5061/dryad.v8g21pt
- # Clear workspace and load packages
- rm(list = ls())
- library(tidyverse)
- # Set the location of the working directory
- setwd("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
- # perform Quality control
- system(str_c("plink --bfile ADAPTmap_genotypeTOP_20160222_full --chr-set 29 --autosome ",
- "--keep animalsToKeep.txt --nonfounders ",
- "--geno 0.1 --mind 0.1 --maf 0.05 ",
- "--make-bed --out afterQC"))
- #################
- # Check PCA plot
- #################
- system("plink --bfile afterQC --chr-set 29 --pca --out plinkPCA")
- ###
- # Visualize PCA results
- ###
- # read in result files
- eigenValues <- read_delim("plinkPCA.eigenval", delim = " ", col_names = F)
- eigenVectors <- read_delim("plinkPCA.eigenvec", delim = " ", col_names = F)
- ## Proportion of variation captured by each vector
- eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
- # PCA plot
- ggplot(data = eigenVectors) +
- geom_point(mapping = aes(x = X3, y = X4, color = X1, shape = X1), size = 3, show.legend = TRUE ) +
- geom_hline(yintercept = 0, linetype="dotted") +
- geom_vline(xintercept = 0, linetype="dotted") +
- labs(title = "PCA of selected goat populations",
- x = paste0("Principal component 1 (",eigen_percent[1,1]," %)"),
- y = paste0("Principal component 2 (",eigen_percent[2,1]," %)"),
- colour = "Goat breeds", shape = "Goat breeds") +
- theme_minimal()
- # prepare file for the Structure software
- system("plink --bfile afterQC --chr-set 29 --recode structure --out forStructure")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement