amc

download list of journal names and their abbreviations

amc
Feb 12th, 2012
469
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.82 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use warnings;
  3. use strict;
  4.  
  5. #############################################
  6. # downloads the lists of journal names and their
  7. # abbreviations from the Web of Science database
  8. # and generates a master list containing all
  9. # of the journals listed.
  10. #
  11. # by Alex Chubaty (alex.chubaty@gmail.com)
  12. #############################################
  13. package jParser;
  14. use base 'HTML::Parser';
  15.  
  16. my @contents = "";
  17.  
  18. my $flag = 0;
  19.  
  20. sub start {
  21.     my ($self, $tag, $attr, $attrseq, $origtext) = @_;
  22.     if ($tag eq "dl") {
  23.         $flag = 1;
  24.     }
  25. }
  26.  
  27. sub text {
  28.     my ($self, $text) = @_;
  29.     if ($flag) {
  30.         $text =~ s/&/&/g;
  31.         $text =~ s/&/\\&/g;
  32.         push(@contents, $text);
  33.     }
  34. }
  35.  
  36. sub end {
  37.     my ($self, $tag, $origtext) = @_;
  38.     if ($tag eq "dl") { $flag = 0; }
  39. }
  40.  
  41.  
  42.  
  43. package main;
  44. use LWP::Simple;
  45.  
  46. my $letter;
  47. my $url;
  48.  
  49. my @jabbrev;
  50. my @jfull;
  51. my @letters = ("A".."Z");
  52. my @urls = ("http://images.webofknowledge.com/WOK46/help/WOS/0-9_abrvjt.html");
  53.  
  54. foreach $letter (@letters) {
  55.     $url = "http://images.webofknowledge.com/WOK46/help/WOS/" . $letter . "_abrvjt.html";
  56.     push(@urls, $url);
  57. }
  58.  
  59. foreach $url (@urls) {
  60.     my $html = get($url);
  61.     die "$0: get failed" unless defined $html;
  62.    
  63.     my $parser = new jParser;
  64.     $parser->parse($html);
  65. }
  66.  
  67. open(OUTFILE, ">output.txt") || die "$!";
  68. print OUTFILE @contents, "\n";
  69. close(OUTFILE);
  70.  
  71. open(INFILE, 'output.txt');
  72. undef $/;
  73. my $textfile = <INFILE>;
  74. close(INFILE);
  75.  
  76. $textfile =~ s/\n\n/\n/g;
  77. $textfile =~ s/\n\t/\t/g;
  78. $textfile =~ s/\n\'92/\'92/;
  79.  
  80. open(OUTFILE, ">masterlist.txt");
  81. print OUTFILE $textfile;
  82.  
  83. # add custom journal entries
  84. print OUTFILE "THE AMERICAN NATURALIST\tAM NAT\n";
  85. print OUTFILE "THE CANADIAN ENTOMOLOGIST\tCAN ENTOMOL\n";
  86. print OUTFILE "TRENDS IN ECOLOGY AND EVOLUTION\tTRENDS ECOL EVOL\n";
  87. close(OUTFILE);
  88.  
  89. unlink("output.txt");
  90.  
  91. exit
Add Comment
Please, Sign In to add comment