Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # sat3310 - lab05
- # created by USERID@mtu.edu
- # DATE
- use LWP::Simple;
- # Variables
- $dataurl = "http://www.tech.mtu.edu/~toarney/sat3310/lab07/";
- $datafile = "fortune500.tsv";
- $datapath = "/home/benakapoi/Documents/labs/lab05/data";
- # Main
- getstore($dataurl.$datafile, $datapath.$datafile);
- # Get a file - list of websites
- # Parse websites file
- open(FILE, "<", $datapath.$datafile);
- while ($line = <FILE>) {
- my @website=split("\t",$line);
- push @arrayofurls, $website[2];
- }
- close FILE;
- # Suffering from bufferring
- $|=1;
- # Remove first element of array
- shift @arrayofurls;
- # Print size of array
- $totalsize = scalar @arrayofurls;
- print "Total number of websites: $totalsize\n";
- # Start the loop
- foreach my $website (@arrayofurls) {
- my $completeurl = "http://".$website;
- my ($type, $length, $modtime, $expiretime, $servertype) = head($completeurl);
- if ($servertype eq "") {$servertype = "Unknown"};
- if (index($servertype, "/") > 0) {
- $servertypesubstring = substr($servertype, 0, index($servertype, "/"));
- }
- else {
- $servertypesubstring = $servertype;
- }
- push (@arrayofservertypes, $servertypesubstring);
- $working++;
- print "Working... $working of $totalsize done.";
- print "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
- }
- print "\n";
- # Sort results
- # Count results
- %servertypehash = ();
- foreach my $servername (@arrayofservertypes) {
- $servertypehash{$servername}++;
- # print "Hash: $servername\t $servertypehash{$servername} \n";
- }
- # Print results
- print "\nResults:\n";
- foreach my $server (sort { $servertypehash{$a} <=> $servertypehash{$b} } keys %servertypehash) {
- print "$servertypehash{$server} \t $server\n";
- }
- # vi ~/Documents/labs/lab05/lab05.py
- #!/usr/bin/python
- # sat3310 - lab05
- # created by bcstinch@mtu.edu
- # 3/25/2018
- # Modules
- import urllib2
- import csv
- import collections
- import sys
- # Variables
- dataurl = 'http://www.tech.mtu.edu/~toarney/sat3310/lab07/'
- datafile = 'fortune500.tsv'
- datapath = '/home/benakapoi/Documents/labs/lab05/data/'
- myserverresponses = []
- websitecount = 0
- # Main
- # Get a file - list of websites
- downloadfile = urllib2.urlopen(dataurl + datafile)
- with open(datapath + datafile, 'w') as output:
- output.write(downloadfile.read())
- output.close()
- # Parse websites file
- mywebsitecount = open(datapath + datafile).readlines()
- print "There are", len(mywebsitecount)-1, "websites in", datapath + datafile
- with open(datapath + datafile, 'rt') as inputfile:
- next(inputfile)
- mywebsites = csv.reader(inputfile, delimiter='\t')
- for rank,company,websites in mywebsites:
- request = urllib2.Request('http://' + websites)
- try:
- response = urllib2.urlopen(request,timeout=5)
- if response.info().getheader('Server') == '':
- myserverresponses.append ("Blank")
- else :
- my_server_type = str(response.info().getheader('Server'))
- my_server_type = my_server_type.split('/',1)
- myserverresponses.append (my_server_type[0])
- websitecount +=1
- except IOError, e:
- myserverresponses.append ("Error")
- websitecount +=1
- print "Processing:", websitecount, "of", len(mywebsitecount)-1,
- print "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
- # Sort, count, print result
- print "\nResults\n"
- counter = collections.Counter(myserverresponses)
- for servers, freq in counter.most_common():
- print freq, "\t", servers
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement