Advertisement
reece

fetching exons with ncbi eutilities

May 21st, 2011
200
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2.  
  3. use strict;
  4. use warnings;
  5.  
  6. use Data::Dumper;
  7. use XML::LibXML;
  8.  
  9. my $ac = shift;
  10.  
  11. use Bio::DB::EUtilities;
  12. my $eu;
  13. $eu = Bio::DB::EUtilities->new(-eutil  => 'esearch',
  14.                                -db     => 'gene',
  15.                                -term   => $ac,
  16.                                -email  => 'reecehart@gmail.com',
  17.                                -retmax => 25);
  18. my @ids = $eu->get_ids;
  19. $eu = Bio::DB::EUtilities->new(-eutil  => 'efetch',
  20.                                -db     => 'gene',
  21.                                -id     => \@ids,
  22.                                -email  => 'reecehart@gmail.com',
  23.                                -retmode => 'xml',
  24.                                -retmax => 25);
  25. my $xml = $eu->get_Response()->content();
  26. my $dom = XML::LibXML->load_xml(string => $xml);
  27.  
  28. # First find the GRCh37 chromosome entry
  29. # There should be only one, but I don't test for this
  30. my $q1 = join('/',
  31.               qw(/Entrezgene-Set Entrezgene Entrezgene_locus ),
  32.               "Gene-commentary[Gene-commentary_label = 'chromosome']"
  33.              );
  34. my ($grch37_chr) = grep
  35.   { $_->findvalue('Gene-commentary_heading') =~ m/^GRCh37/ }
  36.   $dom->findnodes($q1);
  37.  
  38. my ($base_ac,$v) = $ac =~ m/(^\w+)\.(\d+)/;
  39. my $q2 = join('/',
  40.               'Gene-commentary_products',
  41.               "Gene-commentary[Gene-commentary_accession/text() = '$base_ac' and Gene-commentary_version/text() = '$v']",
  42.               qw(Gene-commentary_genomic-coords Seq-loc Seq-loc_mix
  43.                    Seq-loc-mix Seq-loc Seq-loc_int Seq-interval
  44.                   )
  45.              );
  46.  
  47. print("NCBI ($ac)\n");
  48. foreach my $n ($grch37_chr->findnodes($q2)) {
  49.   my ($s) = $n->findvalue('Seq-interval_from');
  50.   my ($e) = $n->findvalue('Seq-interval_to');
  51.   printf("%d\t%d\t%d\n", $s+1, $e+1, $e-$s+1);
  52. }
Advertisement
Advertisement
Advertisement
RAW Paste Data Copied
Advertisement