Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use strict;
- use warnings;
- use Data::Dumper;
- use XML::LibXML;
- my $ac = shift;
- use Bio::DB::EUtilities;
- my $eu;
- $eu = Bio::DB::EUtilities->new(-eutil => 'esearch',
- -db => 'gene',
- -term => $ac,
- -email => 'reecehart@gmail.com',
- -retmax => 25);
- my @ids = $eu->get_ids;
- $eu = Bio::DB::EUtilities->new(-eutil => 'efetch',
- -db => 'gene',
- -id => \@ids,
- -email => 'reecehart@gmail.com',
- -retmode => 'xml',
- -retmax => 25);
- my $xml = $eu->get_Response()->content();
- my $dom = XML::LibXML->load_xml(string => $xml);
- # First find the GRCh37 chromosome entry
- # There should be only one, but I don't test for this
- my $q1 = join('/',
- qw(/Entrezgene-Set Entrezgene Entrezgene_locus ),
- "Gene-commentary[Gene-commentary_label = 'chromosome']"
- );
- my ($grch37_chr) = grep
- { $_->findvalue('Gene-commentary_heading') =~ m/^GRCh37/ }
- $dom->findnodes($q1);
- my ($base_ac,$v) = $ac =~ m/(^\w+)\.(\d+)/;
- my $q2 = join('/',
- 'Gene-commentary_products',
- "Gene-commentary[Gene-commentary_accession/text() = '$base_ac' and Gene-commentary_version/text() = '$v']",
- qw(Gene-commentary_genomic-coords Seq-loc Seq-loc_mix
- Seq-loc-mix Seq-loc Seq-loc_int Seq-interval
- )
- );
- print("NCBI ($ac)\n");
- foreach my $n ($grch37_chr->findnodes($q2)) {
- my ($s) = $n->findvalue('Seq-interval_from');
- my ($e) = $n->findvalue('Seq-interval_to');
- printf("%d\t%d\t%d\n", $s+1, $e+1, $e-$s+1);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement