# Structure tutorial

Oct 12th, 2021
569
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # Data check and preparation to examine crossbreds with the Structure software
2. # See video on the Genomics Boot Camp YouTube channel
3.
4. # Prerequisites:
7.
8.
9. # Clear workspace and load packages
10. rm(list = ls())
11. library(tidyverse)
12.
13. # Set the location of the working directory
14. setwd("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
15.
16.
17. # perform Quality control
19.              "--keep animalsToKeep.txt --nonfounders ",
20.              "--geno 0.1 --mind 0.1 --maf 0.05 ",
21.              "--make-bed --out afterQC"))
22.
23.
24. #################
25. # Check PCA plot
26. #################
27.
29.
30. ###
31. # Visualize PCA results
32. ###
33.
34. # read in result files
37.
38. ## Proportion of variation captured by each vector
39. eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
40.
41. # PCA plot
42. ggplot(data = eigenVectors) +
43.   geom_point(mapping = aes(x = X3, y = X4, color = X1, shape = X1), size = 3, show.legend = TRUE ) +
44.   geom_hline(yintercept = 0, linetype="dotted") +
45.   geom_vline(xintercept = 0, linetype="dotted") +
46.   labs(title = "PCA of selected goat populations",
47.        x = paste0("Principal component 1 (",eigen_percent[1,1]," %)"),
48.        y = paste0("Principal component 2 (",eigen_percent[2,1]," %)"),
49.        colour = "Goat breeds", shape = "Goat breeds") +
50.   theme_minimal()
51.
52.
53. # prepare file for the Structure software
54. system("plink --bfile afterQC --chr-set 29 --recode structure --out forStructure")
55.
56.
57.
58.
59.
60.
61.