Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- open(INFO, "/usr/bin/lynx -dump [url redacted]|");
- my @lines = <INFO>;
- close(INFO);
- #count variable will represent the current "column", eg:
- # name|position|email|... will be 1|2|3|..., up to 7
- $count = 0;
- foreach $line (@lines)
- {
- #printf($count);
- chomp($line);
- if($line=~/Waterloo Arts/){
- last;
- next;
- }
- if($line=~/^\[/ && $count==0)
- {
- #removing the leading number and square brackets
- my $index = index($line, ']') + 1;
- my $fragment = substr $line, $index;
- printf("$fragment\|");
- $count = 1;
- next;
- }
- if($line=~/^\[/ && ($count==5||$count==6))
- {
- #removing the leading number and square brackets
- my $index = index($line, ']') + 1;
- my $fragment = substr $line, $index;
- printf("\|\n$fragment\|");
- $count = 1;
- next;
- }
- #removing the leading white space
- #source for this line: http://perlmaven.com/trim
- $line =~ s/^\s+//;
- if($line=~/head shot/||$line=~/Head shot/){
- next;
- }
- elsif($count==1 && $line=~/\@/){
- #removing the leading number and square brackets
- my $index = index($line, ']') + 1;
- my $fragment = substr $line, $index;
- printf("$fragment\|");
- $count=3;
- next;
- }
- elsif($count==1 && ($line=~/[a...Z]/ || $line=~/Professor/)){# || $line=~/Distinguished/)){
- printf("$line\|");
- $count =2;
- next;
- }
- elsif($count==2 && $line=~/\@/){
- #removing the leading number and square brackets
- my $index = index($line, ']') + 1;
- my $fragment = substr $line, $index;
- printf("$fragment\|");
- $count = 3;
- next;
- }
- elsif($count==3 && $line=~/[1...9]/ && !($line=~/PAS/ || $line=~/TT/ || $line=~/EV1/ || $line=~/NH/)){
- printf("$line\|");
- $count = 4;
- next;
- }
- elsif(($count==3 || $count==4) && ($line=~/PAS/ || $line=~/TT/ || $line=~/EV1/ || $line=~/NH/)){
- if($count==3){
- printf("\|");
- }
- printf("$line\|");
- $count=5;
- next;
- }
- elsif($count==4 && $line=~/Location/){
- next;
- }
- #at this point, after location, no field with [xx] will be included, nor will bullet points.
- #Also hard-coded exception, Rita has a blurb we aren't including
- elsif($line=~/^\[/ || $line=~/^\*/ || $line=~/^\+/ || $line=~/Items of Interest/)
- {
- next;
- }
- elsif($count==5 && $line=~/Supervisor/){
- printf("$line\|");
- $count=6;
- next;
- }
- elsif(($count>=2)&&($line=~/Faclty/ || $line=~/Graduate/ || $line=~/Staff/ || $line=~/Emerita/)){
- if($line=~/Emeritus/){
- $line=Emeritus;
- }
- for(; $count<=5; $count++){
- printf("\|");
- }
- printf("$line\n");
- $count=0;
- next;
- }
- #printf("$count\n");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement