Advertisement
GenomicsBootCamp

Structure tutorial

Oct 12th, 2021
569
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Data check and preparation to examine crossbreds with the Structure software
  2. # See video on the Genomics Boot Camp YouTube channel
  3.  
  4. # Prerequisites:
  5. # 1) Download and install Structure https://web.stanford.edu/group/pritchardlab/structure.html
  6. # 2) Get data https://datadryad.org/stash/dataset/doi:10.5061/dryad.v8g21pt
  7.  
  8.  
  9. # Clear workspace and load packages
  10. rm(list = ls())
  11. library(tidyverse)
  12.  
  13. # Set the location of the working directory
  14. setwd("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
  15.  
  16.  
  17. # perform Quality control
  18. system(str_c("plink --bfile ADAPTmap_genotypeTOP_20160222_full --chr-set 29 --autosome ",
  19.              "--keep animalsToKeep.txt --nonfounders ",
  20.              "--geno 0.1 --mind 0.1 --maf 0.05 ",
  21.              "--make-bed --out afterQC"))
  22.  
  23.  
  24. #################
  25. # Check PCA plot
  26. #################
  27.  
  28. system("plink --bfile afterQC --chr-set 29 --pca --out plinkPCA")
  29.  
  30. ###
  31. # Visualize PCA results
  32. ###
  33.  
  34. # read in result files
  35. eigenValues <- read_delim("plinkPCA.eigenval", delim = " ", col_names = F)
  36. eigenVectors <- read_delim("plinkPCA.eigenvec", delim = " ", col_names = F)
  37.  
  38. ## Proportion of variation captured by each vector
  39. eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
  40.  
  41. # PCA plot
  42. ggplot(data = eigenVectors) +
  43.   geom_point(mapping = aes(x = X3, y = X4, color = X1, shape = X1), size = 3, show.legend = TRUE ) +
  44.   geom_hline(yintercept = 0, linetype="dotted") +
  45.   geom_vline(xintercept = 0, linetype="dotted") +
  46.   labs(title = "PCA of selected goat populations",
  47.        x = paste0("Principal component 1 (",eigen_percent[1,1]," %)"),
  48.        y = paste0("Principal component 2 (",eigen_percent[2,1]," %)"),
  49.        colour = "Goat breeds", shape = "Goat breeds") +
  50.   theme_minimal()
  51.  
  52.  
  53. # prepare file for the Structure software
  54. system("plink --bfile afterQC --chr-set 29 --recode structure --out forStructure")
  55.  
  56.  
  57.  
  58.  
  59.  
  60.  
  61.  
Advertisement
RAW Paste Data Copied
Advertisement