Advertisement
Guest User

Untitled

a guest
Aug 15th, 2017
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.30 KB | None | 0 0
  1. use strict;
  2. use warnings;
  3. use Bio::EnsEMBL::Registry;
  4.  
  5.  
  6. #get chromosome# from shell
  7. my $chr = shift @ARGV;
  8. my $registry = 'Bio::EnsEMBL::Registry';
  9.  
  10. #use local copy of ensembl database
  11. $registry->load_registry_from_db(
  12.     -host => 'localhost',
  13.     -user => 'user',
  14.     -pass => 'pass'
  15. );
  16.  
  17. my $population_name = 'CSHL-HAPMAP:HapMap-CEU'; #we only want LD in this population
  18.  
  19. #lot of adaptor
  20. my $slice_adaptor = $registry->get_adaptor('human', 'core', 'slice'); #get adaptor for Slice object
  21. my $population_adaptor = $registry->get_adaptor('human', 'variation', 'population'); #get adaptor for Population object
  22.  
  23. my $population = $population_adaptor->fetch_by_name($population_name); #get population object from database
  24.  
  25. #write output to this file
  26. open(my $fo, "| gzip > ${chr}_ld.gz");
  27.  
  28. #calcuate chromosome length
  29. my $slice = $slice_adaptor->fetch_by_region('chromosome', $chr);
  30. my $chrlen =  $slice->length();
  31.  
  32. #can't use a whole chromosome at once, partition with overlap
  33. my @bin = @{bin($chrlen, 10000000, 300000)};
  34.  
  35. foreach my $range (@bin){
  36.   my ($start, $end) =  @{$range};
  37.   print "Fetching", $chr, ":", $start, "-", $end, "\n";
  38.   my $slice = $slice_adaptor->fetch_by_region('chromosome', $chr, $start, $end);
  39.   print "get adaptor\n";
  40.   my $ldFeatureContainerAdaptor = $registry->get_adaptor('human', 'variation', 'ldfeaturecontainer'); #get adaptor for LDFeatureContainer object
  41.   print "fetching:\n";
  42.   my $ldFeatureContainer = $ldFeatureContainerAdaptor->fetch_by_Slice($slice,$population); #retrieve all LD values in the region
  43.  
  44.   foreach my $r_square (@{$ldFeatureContainer->get_all_r_square_values}){
  45.     if ($r_square->{r2} > 0.3){ #only LD with 0.3 or more R^2
  46.       print $fo $r_square->{variation1}->variation_name,",", $r_square->{variation2}->variation_name, ",", $r_square->{r2}, "\n";
  47.     }
  48.   }
  49. }
  50.  
  51.  
  52. #partitioning range
  53. sub bin {
  54.   my @bin = ();
  55.   my $asize = shift;
  56.   my $bsize = shift;
  57.   my $overlap = shift;
  58.   my $p = 0;
  59.   #check arg
  60.   if ($bsize <= $overlap){
  61.     die;
  62.   }
  63.  
  64.   while ($p < $asize){
  65.      
  66.     my $next = $p + $bsize - 1;
  67.     #if it hit the limit, stop right there
  68.     if ($next >= $asize) {
  69.       $next = $asize;
  70.       push(@bin, [$p, $next]);
  71.       last;
  72.     }
  73.     push(@bin, [$p, $next]);
  74.     $p = ($next + 1) - $overlap;
  75.   }
  76.   return \@bin;
  77. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement