Guest User

Untitled

a guest
Dec 12th, 2018
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.52 KB | None | 0 0
  1. ### ----------------------------------------------------
  2. ### Script for sorting and comparing multi-sample report
  3. ### values from Avadis NGS
  4. ###
  5. ### Written by Niklas Malmqvist 2012
  6. ### ----------------------------------------------------
  7.  
  8. #!/usr/bin/perl
  9. use strict;
  10. use warnings;
  11. use Getopt::Long;
  12.  
  13. ## Declare various variables;
  14. my $sortColumn;
  15. my $sortDescending = 0;
  16. my $INPUT;
  17. my @splitline = ();
  18. my $noTotalHighScores = 0;
  19. my $minScore;
  20. my $offset;
  21. my $noSamples;
  22. my $noSampleHighScores = 0;
  23. my $minSampleCount = 0;
  24.  
  25. GetOptions( "col=i" => \$sortColumn,
  26. "r" => \$sortDescending,
  27. "s=n" => \$noSamples, ## TODO: Figure out how to automatically read nbr of samples
  28. "m=n" => \$minScore,
  29. "c=n" => \$minSampleCount,
  30. "i=s" => \$INPUT);
  31.  
  32. ## Options input check
  33. if ($sortDescending) { $sortDescending = "r"; } else { $sortDescending = ""; }
  34. unless ($sortColumn && $INPUT && $noSamples && $minScore && $minSampleCount)
  35. { print "$0: Missing mandatory input arguments!\n"; exit 1; }
  36.  
  37. ## Open the input file
  38. open INFILE, "<", $INPUT or die "Can't open the input file!";
  39.  
  40. ## Open the output file
  41. open OUTFILE , ">", $INPUT."-highscores" or die "Can't open the output file!";
  42.  
  43. ## Sort the file and filter out rows with "Sample count" below treshold
  44. system("head -n1 $INPUT > tmpPart1.csv");
  45. system("tail -n+2 $INPUT | sort -k".$sortColumn."n".$sortDescending." > tmpPart2.csv");
  46. system("awk \'\$7 >= $minSampleCount\' tmpPart2.csv > tmpPart3.csv");
  47. system("cat tmpPart1.csv tmpPart3.csv > $INPUT.sorted");
  48. system("rm tmpPart1.csv tmpPart2.csv tmpPart3.csv");
  49.  
  50. close (INFILE);
  51.  
  52. ## Go through the sorted file line-by-line and check for scores
  53. open INFILE_SORTED, "<", $INPUT.".sorted" or die "Can't open the sorted input file!";
  54. my $lineCount = 1;
  55.  
  56. while (my $line = <INFILE_SORTED>) {
  57. # Skip first line (header)
  58. if ($lineCount == 1) {
  59. ## Print the header to the output
  60. print OUTFILE $line;
  61. }
  62. else {
  63.  
  64. # The score for the first sample is stored in column 14, next sample in column 20 and so on
  65. # This is handled by $offset
  66. $offset = 14;
  67. @splitline = split('\t', $line);
  68.  
  69. $noSampleHighScores = 0;
  70. ## Check the scores of all the samples in the report
  71. for (1 .. $noSamples) {
  72.  
  73. if ($splitline[$offset-1] && ($splitline[$offset-1] >= $minScore)) {
  74. ## Score threshold OK
  75. $noSampleHighScores++;
  76. }
  77. $offset += 6;
  78. }
  79.  
  80. if ($noSampleHighScores == $noSamples) {
  81. $noTotalHighScores++;
  82. print OUTFILE $line;
  83. }
  84. }
  85. $lineCount++;
  86. }
  87.  
  88. print "Done!\nSamples with score > threshold: $noTotalHighScores\n";
  89.  
  90. close (INFILE_SORTED);
  91. close (OUTFILE);
Add Comment
Please, Sign In to add comment