Advertisement
dsuveges

FSEC_process_v2.1.pl

Jul 3rd, 2013
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 3.81 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. # Function:
  4.    # Opens all files with .txt extension in the working directory
  5.    # The program collects data from the first channel only - that has the GFP fluorescence
  6.    # the GFP fluorescence is organized into columns named after the input ascii file.
  7.    # The first coumn is the elution time (in minutes)
  8.    # As the dataset is usually extremely dense, there is an option to restrict sampled data.
  9.    # the default value of the restriction is 1 meaning it keeps all datapoints.
  10.    # the data restriction can be set by the -p switch: setting -p to 2 means every second ponts will be kept
  11.  
  12. # usage:
  13.       # $>.FSEC_process_v2.1.pl -p [points to keep] -o [OUTPUT file]
  14.  
  15. my $version = 'v.2.1'; # last modified 07.03.2013
  16.       # Code optimization
  17.       # User can set the name of the output file, by default, the name will be "FSEC_OUTFILE.scv"
  18.       # When specifying the outputfile, do not add extension! It will be automaticly .csv!
  19.       # .txt removed from the output column headers
  20.  
  21. # version v.2.0 05.21.2013
  22.       # Minor code optimization
  23.       # Code more robust
  24.  
  25. # version v.1.5 01.13.2012
  26.       # Instead writing out many files, data conbined into one large tabel
  27.       # The number of points we want to keep can be adjusted with the -p switch
  28.  
  29. # version v.0.1 01.13.2012
  30.       # Batch file conversion - deals with all txt files in the working directory
  31.       # Outputfile is named after the ascii file, with the csv extension.  
  32.  
  33. use warnings;
  34. use strict;
  35. use Getopt::Std;
  36.  
  37. our ($opt_p, $opt_o) = "";
  38. getopt('p:o:');
  39.  
  40. print "program version:\t $version\n";
  41.  
  42. # By default we keep all points, but if -p is specified the keep_point variable will change
  43. our $keep_point = "1";
  44. if ( $opt_p ) {
  45.    $opt_p =~ s/\s//g;
  46.    $keep_point = $opt_p if $opt_p =~ /[0-9]+/;
  47. }
  48. print "Program keeps every:\t $keep_point points\n";
  49.  
  50. # By default the outputfile name will be FSEC_OUTFILE
  51. our $outfilename = "FSEC_OUTFILE";
  52. if ( $opt_o ) {
  53.    $opt_o =~ s/\s//g;
  54.    $outfilename = $opt_o;
  55. }
  56. print "Output file name:\t $outfilename.csv\n";
  57.  
  58. # Read working directory, get list of files, gather files with the requested extension
  59. my $dir         = ".";
  60. my @filelist    = ();
  61.  
  62. opendir (DIR, $dir) or die$!;
  63. while (my $file = readdir(DIR)){
  64.  
  65.     # The second criteria is avoid output files of previous runs, from being procesed as input files
  66.     if ($file =~ /\.txt$/){
  67.         unshift (@filelist, $file);
  68.     }
  69. }
  70. closedir (DIR);
  71.  
  72. print "txt files in the folder: ", scalar(@filelist),"\n\n\n";
  73. # Open output file
  74. open (OUTFILE, ">", "$outfilename.csv") or die print "Output file could not be opened!!\n";
  75.  
  76. # Main loop, called for each file with the requested extension
  77. our %fluorescence_values = ();
  78.  
  79. foreach my $file (@filelist){
  80.     print "Processing: $file\n";
  81.     &files($file);
  82.     $file =~ s/\s/_/g;  
  83.     # print "$file file finished\n"
  84. }
  85.  
  86.  
  87. print OUTFILE "time,";
  88. foreach my $file (@filelist){
  89.     print OUTFILE substr($file,0,-4),",";
  90. }
  91. print OUTFILE "\n";
  92.  
  93. foreach my $time (sort {$a <=> $b} keys %fluorescence_values ){
  94.     print OUTFILE "$time,";
  95.     print OUTFILE join (",", @{$fluorescence_values{$time}});
  96.     print OUTFILE "\n";
  97. }
  98. close OUTFILE;
  99.  
  100.  
  101. sub files {
  102.     my $filename = $_[0];
  103.     open (INFILE, "<", "$filename");
  104.     my $i = "0";
  105.    
  106.     foreach my $line (<INFILE>){
  107.      
  108.            # In the regexp, we have to deal with the '-' sign of the negative emissions. Can cause trouble.
  109.         if ($line =~ /^([0-9\.]+)\s+([0-9\-\.]+)\s+[\n\r]/){
  110.             $i++;
  111.            
  112.             # applying filter:
  113.             if ($i/$keep_point == int($i/$keep_point)){
  114.                 push(@{$fluorescence_values{$1}},$2);
  115.             }
  116.         }
  117.        
  118.         # As we reach this line we have to stop data collection, as that belongs to the other channel!!
  119.         elsif ($line =~ /LC Status Trace\(Pump A Pressure\)/) {
  120.           $i = "0";
  121.           return
  122.            }
  123.     }
  124.     close INFILE;
  125. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement