Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use strict;
- use warnings;
- use Getopt::Long;
- # Declare variables
- my @nucs;
- my @data;
- my $GC_counter;
- my $sequence;
- # Define defaults for variables
- my $fasta="";
- my $winlen="";
- my $increment="";
- parseArgs(); # subroutine to call GetOptions
- # Open the fasta file, read it in, pass only sequences to @data
- open( FASTA, $fasta ) || die "Doh! Can't open $fasta: $!n";
- while( my $seq = <FASTA> ){
- chomp $seq;
- if( $seq =~ /^>/ ){
- next;
- }elsif( $seq =~ /^s*$/ ){
- next;
- }elsif( $seq =~ /^s*#/ ){
- next;
- }else{
- $data .= $seq;
- }
- }
- close FASTA;
- # subroutine to call sliding_window and calculate GC content of each
- # sliding window
- sliding_window();
- exit;
- ###########################SUBROUTINES##################################
- sub parseArgs{
- #Message to print if mandatory variables not declared
- my $usage ="nUsage: $0 --fasta /path/to/*.fasta --winlen <integer> --increment <integer> [options]
- Mandatory options:
- --fasta - path to the first input file (fasta) in .fasta or .fsa format
- --winlen - specify size of the sliding window
- --increment - specify increment length for the sliding window
- n";
- my $options = GetOptions
- (
- 'fasta=s{1,1}' => $fasta,
- 'winlen=i{1,1}' => $winlen,
- 'increment=i{1,1}' => $increment,
- );
- if ( $fasta eq "" ){ die "nnDoh!: fasta input file must be specified!nn$usagen"};
- if ( $winlen eq "" ){die "nnDoh!: length of sliding window must be specifiednn$usagen"};
- if ( $increment eq "" ){die "nnDoh!: increment length must be specifiednn$usagen"};
- };
- #########################################################################
- sub sliding_window{
- my $start = 0;
- while( $start < length( $data ) ){;
- my $sequence = substr( $data, $start, $winlen );
- if( $start == 0 ){
- print $start + 1, "t";
- }else{
- print $start, "t";
- }
- # calls the subroutine getGC to calculate %GC content
- getGC( $sequence );
- $start = $start + $increment;
- }
- }
- #########################################################################
- sub getGC{
- my $sequence = $_[0];
- my $GC_counter = 0;
- my @nucs = split( //, $sequence );
- for ( my $i = 0; $i < @nucs; $i++ ){
- if( $nucs[$i] =~ /G|C/g ){
- $GC_counter++;
- }
- }
- printf ( "%.2f%%n", ( $GC_counter / $winlen ) * 100);
- }
- #########################################################################
- push ( @data, $seq );
- @nucs = split ( //, $_ );
- substr outside of string at ./slidingWindowGC.pl line 76.
- 76 my $sequence = substr( @nucs, $increment, $winlen );
- Use of uninitialized value $sequence in print at ./slidingWindowGC.pl line 87.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement