Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use strict;
- use warnings;
- use Bio::EnsEMBL::Registry;
- #get chromosome# from shell
- my $chr = shift @ARGV;
- my $registry = 'Bio::EnsEMBL::Registry';
- #use local copy of ensembl database
- $registry->load_registry_from_db(
- -host => 'localhost',
- -user => 'user',
- -pass => 'pass'
- );
- my $population_name = 'CSHL-HAPMAP:HapMap-CEU'; #we only want LD in this population
- #lot of adaptor
- my $slice_adaptor = $registry->get_adaptor('human', 'core', 'slice'); #get adaptor for Slice object
- my $population_adaptor = $registry->get_adaptor('human', 'variation', 'population'); #get adaptor for Population object
- my $population = $population_adaptor->fetch_by_name($population_name); #get population object from database
- #write output to this file
- open(my $fo, "| gzip > ${chr}_ld.gz");
- #calcuate chromosome length
- my $slice = $slice_adaptor->fetch_by_region('chromosome', $chr);
- my $chrlen = $slice->length();
- #can't use a whole chromosome at once, partition with overlap
- my @bin = @{bin($chrlen, 10000000, 300000)};
- foreach my $range (@bin){
- my ($start, $end) = @{$range};
- print "Fetching", $chr, ":", $start, "-", $end, "\n";
- my $slice = $slice_adaptor->fetch_by_region('chromosome', $chr, $start, $end);
- print "get adaptor\n";
- my $ldFeatureContainerAdaptor = $registry->get_adaptor('human', 'variation', 'ldfeaturecontainer'); #get adaptor for LDFeatureContainer object
- print "fetching:\n";
- my $ldFeatureContainer = $ldFeatureContainerAdaptor->fetch_by_Slice($slice,$population); #retrieve all LD values in the region
- foreach my $r_square (@{$ldFeatureContainer->get_all_r_square_values}){
- if ($r_square->{r2} > 0.3){ #only LD with 0.3 or more R^2
- print $fo $r_square->{variation1}->variation_name,",", $r_square->{variation2}->variation_name, ",", $r_square->{r2}, "\n";
- }
- }
- }
- #partitioning range
- sub bin {
- my @bin = ();
- my $asize = shift;
- my $bsize = shift;
- my $overlap = shift;
- my $p = 0;
- #check arg
- if ($bsize <= $overlap){
- die;
- }
- while ($p < $asize){
- my $next = $p + $bsize - 1;
- #if it hit the limit, stop right there
- if ($next >= $asize) {
- $next = $asize;
- push(@bin, [$p, $next]);
- last;
- }
- push(@bin, [$p, $next]);
- $p = ($next + 1) - $overlap;
- }
- return \@bin;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement