Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use warnings;
- use strict;
- #############################################
- # downloads the lists of journal names and their
- # abbreviations from the Web of Science database
- # and generates a master list containing all
- # of the journals listed.
- #
- # by Alex Chubaty (alex.chubaty@gmail.com)
- #############################################
- package jParser;
- use base 'HTML::Parser';
- my @contents = "";
- my $flag = 0;
- sub start {
- my ($self, $tag, $attr, $attrseq, $origtext) = @_;
- if ($tag eq "dl") {
- $flag = 1;
- }
- }
- sub text {
- my ($self, $text) = @_;
- if ($flag) {
- $text =~ s/&/&/g;
- $text =~ s/&/\\&/g;
- push(@contents, $text);
- }
- }
- sub end {
- my ($self, $tag, $origtext) = @_;
- if ($tag eq "dl") { $flag = 0; }
- }
- package main;
- use LWP::Simple;
- my $letter;
- my $url;
- my @jabbrev;
- my @jfull;
- my @letters = ("A".."Z");
- my @urls = ("http://images.webofknowledge.com/WOK46/help/WOS/0-9_abrvjt.html");
- foreach $letter (@letters) {
- $url = "http://images.webofknowledge.com/WOK46/help/WOS/" . $letter . "_abrvjt.html";
- push(@urls, $url);
- }
- foreach $url (@urls) {
- my $html = get($url);
- die "$0: get failed" unless defined $html;
- my $parser = new jParser;
- $parser->parse($html);
- }
- open(OUTFILE, ">output.txt") || die "$!";
- print OUTFILE @contents, "\n";
- close(OUTFILE);
- open(INFILE, 'output.txt');
- undef $/;
- my $textfile = <INFILE>;
- close(INFILE);
- $textfile =~ s/\n\n/\n/g;
- $textfile =~ s/\n\t/\t/g;
- $textfile =~ s/\n\'92/\'92/;
- open(OUTFILE, ">masterlist.txt");
- print OUTFILE $textfile;
- # add custom journal entries
- print OUTFILE "THE AMERICAN NATURALIST\tAM NAT\n";
- print OUTFILE "THE CANADIAN ENTOMOLOGIST\tCAN ENTOMOL\n";
- print OUTFILE "TRENDS IN ECOLOGY AND EVOLUTION\tTRENDS ECOL EVOL\n";
- close(OUTFILE);
- unlink("output.txt");
- exit
Add Comment
Please, Sign In to add comment