Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2014
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.88 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. use strict;
  4. use warnings;
  5. use Getopt::Long;
  6.  
  7. # Declare variables
  8. my @nucs;
  9. my @data;
  10. my $GC_counter;
  11. my $sequence;
  12.  
  13. # Define defaults for variables
  14. my $fasta="";
  15. my $winlen="";
  16. my $increment="";
  17.  
  18. parseArgs(); # subroutine to call GetOptions
  19.  
  20. # Open the fasta file, read it in, pass only sequences to @data
  21. open( FASTA, $fasta ) || die "Doh! Can't open $fasta: $!n";
  22. while( my $seq = <FASTA> ){
  23. chomp $seq;
  24.  
  25. if( $seq =~ /^>/ ){
  26. next;
  27. }elsif( $seq =~ /^s*$/ ){
  28. next;
  29. }elsif( $seq =~ /^s*#/ ){
  30. next;
  31. }else{
  32. $data .= $seq;
  33. }
  34. }
  35. close FASTA;
  36.  
  37. # subroutine to call sliding_window and calculate GC content of each
  38. # sliding window
  39. sliding_window();
  40.  
  41. exit;
  42.  
  43.  
  44. ###########################SUBROUTINES##################################
  45.  
  46. sub parseArgs{
  47. #Message to print if mandatory variables not declared
  48. my $usage ="nUsage: $0 --fasta /path/to/*.fasta --winlen <integer> --increment <integer> [options]
  49.  
  50. Mandatory options:
  51. --fasta - path to the first input file (fasta) in .fasta or .fsa format
  52. --winlen - specify size of the sliding window
  53. --increment - specify increment length for the sliding window
  54. n";
  55.  
  56. my $options = GetOptions
  57. (
  58. 'fasta=s{1,1}' => $fasta,
  59. 'winlen=i{1,1}' => $winlen,
  60. 'increment=i{1,1}' => $increment,
  61. );
  62.  
  63. if ( $fasta eq "" ){ die "nnDoh!: fasta input file must be specified!nn$usagen"};
  64. if ( $winlen eq "" ){die "nnDoh!: length of sliding window must be specifiednn$usagen"};
  65. if ( $increment eq "" ){die "nnDoh!: increment length must be specifiednn$usagen"};
  66.  
  67. };
  68.  
  69. #########################################################################
  70.  
  71. sub sliding_window{
  72.  
  73. my $start = 0;
  74. while( $start < length( $data ) ){;
  75. my $sequence = substr( $data, $start, $winlen );
  76.  
  77. if( $start == 0 ){
  78. print $start + 1, "t";
  79. }else{
  80. print $start, "t";
  81. }
  82.  
  83. # calls the subroutine getGC to calculate %GC content
  84. getGC( $sequence );
  85. $start = $start + $increment;
  86.  
  87. }
  88. }
  89.  
  90. #########################################################################
  91.  
  92. sub getGC{
  93.  
  94. my $sequence = $_[0];
  95. my $GC_counter = 0;
  96. my @nucs = split( //, $sequence );
  97.  
  98. for ( my $i = 0; $i < @nucs; $i++ ){
  99. if( $nucs[$i] =~ /G|C/g ){
  100. $GC_counter++;
  101. }
  102. }
  103. printf ( "%.2f%%n", ( $GC_counter / $winlen ) * 100);
  104. }
  105.  
  106. #########################################################################
  107.  
  108. push ( @data, $seq );
  109. @nucs = split ( //, $_ );
  110.  
  111. substr outside of string at ./slidingWindowGC.pl line 76.
  112. 76 my $sequence = substr( @nucs, $increment, $winlen );
  113.  
  114. Use of uninitialized value $sequence in print at ./slidingWindowGC.pl line 87.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement