Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /data/results/
- TestFolder1/
- subfolder1/Variants/MD-14-11856_RNA_v2.vcf
- subfoder2/Variants/SU-16-16117_RNA_v2.vcf
- matrix.txt
- matrixkey.txt
- TestFolder2/
- subfolder1/Variants/SU-15-2542_v2.vcf
- subfolder2/Variants/SU-16-16117_v2.vcf
- matrix.txt
- matrixkey.txt
- Barcode SampleName
- barcode_003 SU-15-2542
- barcode-005 MD-14-11856
- barcode-002 SU-16-16117
- #!/usr/bin/perl
- use warnings;
- use strict;
- use File::Copy qw(move);
- use List::Util 'first';
- use File::Find;
- use File::Spec;
- use Data::Dumper;
- use File::Basename;
- use File::Spec::Functions 'splitdir';
- my $current_directory = "/data/results";
- if (grep -d, glob("$current_directory/*")) {
- print "$current_directory has subfolder(s)n";
- }
- else
- {
- print "there are no foldersn";
- die;
- }
- my %files;
- my @dirs = grep { -d } glob '/data/results/*';
- for my $dir ( @dirs ) {
- print "the directory is $dirn";
- my $run_folder = (split '/', $dir)[3];
- print "the folder is $run_foldern";
- my $key2 = $run_folder;
- # checks if barcode matrix and barcode summary files exist
- #shortens the folder names and unzips them.
- #check if each sample is present in the matrix file for each folder.
- my $location = "/data/results/".$run_folder;
- my $matrix_key = "/data/results/".$run_folder."/matrixkey.txt";
- open my $key, '<', $matrix_key or die $!; # key file
- <$key>; # throw away header line in key file (first line)
- my @matrix_key = ();
- @matrix_key = sort {length($b->[1]) <=> length($a->[1])} map [ split ], <$key>;
- close $key or die $!;
- print Dumper(@matrix_key) . "===nn";
- sub find_vcf {
- my $F = $File::Find::name;
- if ($F =~ /vcf$/ ) {
- print "$Fn";
- $F =~ m|([^/]+).vcf$| or die "Can't extract Sample ID";
- my $sample_id = $1; print "the short vcf name is: $sample_idn";
- if ( my $aref = first { index($sample_id, $_->[1]) != -1 } @matrix_key ) {
- #the code fails to match sample_id to matrix_key eventhough it's printed out correctly
- print "$sample_id t MATCHES $aref->[1]n";
- print "t$aref->[1]_$aref->[0]nn";
- } else {
- # handle all other possible exceptions
- #print "folder name is $run_foldern";
- die("The VCF file doesn't match the Summary Barcode file: $sample_idn");
- }
- }
- }
- find({ wanted => &find_vcf, no_chdir=>1}, $location);
- }
- exit();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement