Untitled

#!/usr/bin/perl

# sat3310 - lab05
# created by USERID@mtu.edu
# DATE

use LWP::Simple;


# Variables
$dataurl = "http://www.tech.mtu.edu/~toarney/sat3310/lab07/";
$datafile = "fortune500.tsv";
$datapath = "/home/benakapoi/Documents/labs/lab05/data";

# Main
getstore($dataurl.$datafile, $datapath.$datafile);

# Get a file - list of websites

# Parse websites file
open(FILE, "<", $datapath.$datafile);

while ($line = <FILE>) {
    my @website=split("\t",$line);
    push @arrayofurls, $website[2];
    }

close FILE;

# Suffering from bufferring
$|=1;

# Remove first element of array
shift @arrayofurls;

# Print size of array
$totalsize = scalar @arrayofurls;
print "Total number of websites: $totalsize\n";

# Start the loop
foreach my $website (@arrayofurls) {
    my $completeurl = "http://".$website;
    my ($type, $length, $modtime, $expiretime, $servertype) = head($completeurl);
    if ($servertype eq "") {$servertype = "Unknown"};
    if (index($servertype, "/") > 0) {
        $servertypesubstring = substr($servertype, 0, index($servertype, "/"));
    }
    else {
        $servertypesubstring = $servertype;
    }
    push (@arrayofservertypes, $servertypesubstring);
    $working++;
    print "Working... $working of $totalsize done.";
    print "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
}
print "\n";

# Sort results

# Count results
%servertypehash = ();
foreach my $servername (@arrayofservertypes) {
    $servertypehash{$servername}++;
    # print "Hash: $servername\t $servertypehash{$servername} \n";
}

# Print results
print "\nResults:\n";
foreach my $server (sort { $servertypehash{$a} <=> $servertypehash{$b} } keys %servertypehash) {
        print "$servertypehash{$server} \t $server\n";
    }

# vi ~/Documents/labs/lab05/lab05.py

#!/usr/bin/python

# sat3310 - lab05
# created by bcstinch@mtu.edu
# 3/25/2018

# Modules

import urllib2
import csv
import collections
import sys

# Variables

dataurl = 'http://www.tech.mtu.edu/~toarney/sat3310/lab07/'
datafile = 'fortune500.tsv'
datapath = '/home/benakapoi/Documents/labs/lab05/data/'
myserverresponses = []
websitecount = 0


# Main

# Get a file - list of websites

downloadfile = urllib2.urlopen(dataurl + datafile)
with open(datapath + datafile, 'w') as output:
    output.write(downloadfile.read())
output.close()


# Parse websites file

mywebsitecount = open(datapath + datafile).readlines()
print "There are", len(mywebsitecount)-1, "websites in", datapath + datafile

with open(datapath + datafile, 'rt') as inputfile:
	next(inputfile)
	mywebsites = csv.reader(inputfile, delimiter='\t')

	for rank,company,websites in mywebsites:
		request = urllib2.Request('http://' + websites)
 		try:
			response = urllib2.urlopen(request,timeout=5)
			if response.info().getheader('Server') == '':
				myserverresponses.append ("Blank")
			else :
				my_server_type = str(response.info().getheader('Server'))
				my_server_type = my_server_type.split('/',1)
				myserverresponses.append (my_server_type[0])
			websitecount +=1
		except IOError, e:
			myserverresponses.append ("Error")
			websitecount +=1
   		print "Processing:", websitecount, "of", len(mywebsitecount)-1,
   		print "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",


# Sort, count, print result

print "\nResults\n"
counter = collections.Counter(myserverresponses)
for servers, freq in counter.most_common():
    print freq, "\t", servers