Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###########################################################
- # Computer Project #6
- #
- # open_file()
- # prompt for input
- # check that input is correct (loop if not)
- # return fp
- #
- # read_file(fp)
- # open csv reader
- # for each line in reader
- # if correct data exists
- # create tuple of specific data
- # append to list
- # return list
- #
- # remove_duplicate_sites(list)
- # for each tuple in list
- # if site seen before
- # add it to main & seen list
- # else
- # ignore
- # return list
- #
- # top_sites_per_country(list, country)
- # for each tuple in list
- # if country = [4]
- # add to new list
- # return list
- #
- # top_sites_per_view(list)
- # sort list by views
- # remove_duplicate_sites(list)
- # sort list by views
- # return list
- #
- # main()
- # open_file()
- # generate list by read_file(fp)
- # while true
- # prompt
- # ask user for input
- # if input is 1
- # ask for country
- # for sites in top_sites_per_country()
- # print with formatting
- # elif input is 2
- # prompt for search
- # for sites in full_list
- # if search = full_list[1]
- # print with formatting
- # if no sites found
- # print message
- # elif input is 3
- # for sites in top_sites_per_view()
- # print with formatting
- # elif input is q
- # exit message
- # kill loop
- # else
- # prompt again
- ###########################################################
- import csv # Used for CSV Reader
- from operator import itemgetter # Used for sorting
- PROMPT = '''
- Choose
- (1) Top sites by country
- (2) Search by web site name
- (3) Top sites by views
- (q) Quit
- ''' # Main prompt
- def open_file():
- """
- This function opens a file for reading
- Returns: file pointer
- """
- cont = True
- # loop to ensure we open a file correctly
- while cont:
- try_file = input("Input a filename: ")
- try: # try/except to ensure file exists and opens correctly
- file_obj = open(try_file, "r", encoding="ISO-8859-1")
- cont = False
- except:
- print("Error: file not found.")
- cont = True
- return file_obj # return the correct file
- def read_file(fp):
- """
- This function reads file from open_file() and collects interesting data
- Returns: sorted list of tuples
- """
- website_list = [] # functional list
- reader = csv.reader(fp) # open csv reader
- next(reader, None) # skip header
- for row in reader: # iterate for evert tuple
- try:
- row_min = (int(row[0]), row[1], int(row[14].replace(" ", "")),
- int(row[5].replace(" ", "")),
- row[30]) # interesting data
- website_list.append(row_min) # append to main list
- except:
- pass # ignore rows that fail that test - ex: int(N/A)
- return sorted(website_list, key=itemgetter(0, 4)) # double sorted list
- def remove_duplicate_sites(L_of_L):
- """
- This function iterates through a list, and if a website has been seen before
- it gets ignored and disregarded from output (deduplication)
- Returns: sorted list of tuples
- """
- seen_sites, new_list = [], [] # create seen list and list to be sorted later
- for entry in L_of_L:
- site = entry[1].split('.') # split website to actual name
- if site[1] in seen_sites: # check if in list
- pass
- else:
- seen_sites.append(site[1]) # add to list
- new_list.append(entry) # add to list
- return sorted(new_list, key=itemgetter(0, 1)) # return double sorted list
- def top_sites_per_country(L_of_L, country):
- """
- This function itterates through the list and checks for top sites based on
- user input (country)
- Returns: list of 20 tuples
- """
- new_list = []
- for entry in L_of_L:
- if entry[4] == country: # check if countries match
- new_list.append(entry)
- new_list = sorted(new_list, key=itemgetter(0)) # single sort
- return new_list[:20] # return first 20
- def top_sites_per_views(L_of_L):
- """
- This function sorts a list by top views, deduplicated, then sorts again
- Returns: list of 20 tuples
- """
- L_of_L.sort(reverse=True, key=itemgetter(3)) # sort by views
- L_of_L = remove_duplicate_sites(L_of_L) # deduplicate
- L_of_L.sort(reverse=True, key=itemgetter(3)) # sort by views again
- return L_of_L[:20] # return first 20
- def main():
- """
- This function asks user for input, then loops through options ( see PROMPT )
- then runs functions based on the user input, while formatting the data in a
- way that is uniform.
- Returns: None
- """
- print("----- Web Data -----")
- file_pointer = open_file() # open file
- full_list = read_file(file_pointer) # save list before loop
- cont = True
- while cont:
- print(PROMPT)
- user_input = input("Choice: ") # user input
- if user_input == "1":
- print("--------- Top 20 by Country -----------")
- country = input("Country: ")
- print("{:30s} {:>15s}{:>25s}".format("Website", "Traffic Rank",
- "Average Daily Page Views"))
- for x in top_sites_per_country(full_list, country):
- print("{:30s} {:>15d}{:>30,d}".format(x[1], x[2], x[3]))
- elif user_input == "2":
- c = 0 # Add counter to check if none found
- search = input("Search: ").lower()
- print("{:^50s}".format("Websites Matching Query"))
- for x in full_list:
- if search in x[1]:
- print("{:^10s}".format(x[1]))
- c += 1
- if c == 0:
- print("None found")
- elif user_input == "3":
- print("--------- Top 20 by Page View -----------")
- print("{:30s} {:>15s}".format("Website", "Ave Daily Page Views"))
- for x in top_sites_per_views(full_list):
- print("{:30s} {:>15,d}".format(x[1], x[3]))
- elif user_input.lower() == "q":
- cont = False
- else:
- print("Incorrect input. Try again.")
- if __name__ == "__main__": # Initialize the function
- main() # Start function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement