Advertisement
Guest User

Untitled

a guest
Mar 17th, 2011
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.87 KB | None | 0 0
  1. # Starting FASTQ files
  2. export FQ1=1.fq
  3. export FQ2=2.fq
  4.  
  5. # The names of the random subsets you wish to create
  6. export FQ1SUBSET=1.rand.fq
  7. export FQ2SUBSET=2.rand.fq
  8.  
  9. # How many random pairs do we want?
  10. export N=100
  11.  
  12. # paste the two FASTQ such that the
  13. # header, seqs, seps, and quals occur "next" to one another
  14. paste $FQ1 $FQ2 | \
  15. # "linearize" the two mates into a single record. Add a random number to the front of each line
  16. awk 'BEGIN{srand()}; {OFS="\t"; \
  17. getline seqs; getline sep; getline quals; \
  18. print rand(),$0,seqs,sep,quals}' | \
  19. # sort by the random number
  20. sort -k1,1 | \
  21. # grab the first N records
  22. head -n $N | \
  23. # Convert the stream back to 2 separate FASTQ files.
  24. awk '{OFS="\n"; \
  25. print $2,$4,$6,$8 >> ENVIRON["FQ1SUBSET"]; \
  26. print $3,$5,$7,$9 >> ENVIRON["FQ2SUBSET"]}'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement