SHARE
TWEET

Untitled

a guest Oct 21st, 2019 94 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ###########################################################
  2.  
  3. #   Computer Project #6
  4. #
  5. #   open_file()
  6. #       prompt for input
  7. #       check that input is correct (loop if not)
  8. #       return fp
  9. #
  10. #   read_file(fp)
  11. #       open csv reader
  12. #       for each line in reader
  13. #           if correct data exists
  14. #               create tuple of specific data
  15. #               append to list
  16. #       return list
  17. #
  18. #   remove_duplicate_sites(list)
  19. #       for each tuple in list
  20. #       if site seen before
  21. #           add it to main & seen list
  22. #       else
  23. #           ignore
  24. #       return list
  25. #
  26. #   top_sites_per_country(list, country)
  27. #       for each tuple in list
  28. #           if country = [4]
  29. #               add to new list
  30. #       return list
  31. #
  32. #   top_sites_per_view(list)
  33. #       sort list by views
  34. #       remove_duplicate_sites(list)
  35. #       sort list by views
  36. #       return list
  37. #
  38. #   main()
  39. #       open_file()
  40. #       generate list by read_file(fp)
  41. #       while true
  42. #           prompt
  43. #           ask user for input
  44. #           if input is 1
  45. #               ask for country
  46. #               for sites in top_sites_per_country()
  47. #                   print with formatting
  48. #           elif input is 2
  49. #               prompt for search
  50. #                   for sites in full_list
  51. #                       if search = full_list[1]
  52. #                           print with formatting
  53. #                   if no sites found
  54. #                       print message
  55. #           elif input is 3
  56. #               for sites in top_sites_per_view()
  57. #                   print with formatting
  58. #           elif input is q
  59. #               exit message
  60. #               kill loop
  61. #           else
  62. #               prompt again
  63.  
  64. ###########################################################
  65.  
  66.  
  67. import csv  # Used for CSV Reader
  68. from operator import itemgetter  # Used for sorting
  69.  
  70. PROMPT = '''
  71. Choose
  72.         (1) Top sites by country
  73.         (2) Search by web site name
  74.         (3) Top sites by views
  75.         (q) Quit
  76.         '''  # Main prompt
  77.  
  78.  
  79. def open_file():
  80.     """
  81.    This function opens a file for reading
  82.    Returns: file pointer
  83.    """
  84.     cont = True
  85.     # loop to ensure we open a file correctly
  86.     while cont:
  87.         try_file = input("Input a filename: ")
  88.         try:  # try/except to ensure file exists and opens correctly
  89.             file_obj = open(try_file, "r", encoding="ISO-8859-1")
  90.             cont = False
  91.         except:
  92.             print("Error: file not found.")
  93.             cont = True
  94.     return file_obj  # return the correct file
  95.  
  96.  
  97. def read_file(fp):
  98.     """
  99.    This function reads file from open_file() and collects interesting data
  100.    Returns: sorted list of tuples
  101.    """
  102.     website_list = []  # functional list
  103.     reader = csv.reader(fp)  # open csv reader
  104.     next(reader, None)  # skip header
  105.     for row in reader:  # iterate for evert tuple
  106.         try:
  107.             row_min = (int(row[0]), row[1], int(row[14].replace(" ", "")),
  108.                        int(row[5].replace(" ", "")),
  109.                        row[30])  # interesting data
  110.             website_list.append(row_min)  # append to main list
  111.         except:
  112.             pass  # ignore rows that fail that test - ex: int(N/A)
  113.     return sorted(website_list, key=itemgetter(0, 4))  # double sorted list
  114.  
  115.  
  116. def remove_duplicate_sites(L_of_L):
  117.     """
  118.    This function iterates through a list, and if a website has been seen before
  119.    it gets ignored and disregarded from output (deduplication)
  120.    Returns: sorted list of tuples
  121.    """
  122.     seen_sites, new_list = [], []  # create seen list and list to be sorted later
  123.     for entry in L_of_L:
  124.         site = entry[1].split('.')  # split website to actual name
  125.         if site[1] in seen_sites:  # check if in list
  126.             pass
  127.         else:
  128.             seen_sites.append(site[1])  # add to list
  129.             new_list.append(entry)  # add to list
  130.     return sorted(new_list, key=itemgetter(0, 1))  # return double sorted list
  131.  
  132.  
  133. def top_sites_per_country(L_of_L, country):
  134.     """
  135.    This function itterates through the list and checks for top sites based on
  136.    user input (country)
  137.    Returns: list of 20 tuples
  138.    """
  139.     new_list = []
  140.     for entry in L_of_L:
  141.         if entry[4] == country:  # check if countries match
  142.             new_list.append(entry)
  143.     new_list = sorted(new_list, key=itemgetter(0))  # single sort
  144.     return new_list[:20]  # return first 20
  145.  
  146.  
  147. def top_sites_per_views(L_of_L):
  148.     """
  149.    This function sorts a list by top views, deduplicated, then sorts again
  150.    Returns: list of 20 tuples
  151.    """
  152.     L_of_L.sort(reverse=True, key=itemgetter(3))  # sort by views
  153.     L_of_L = remove_duplicate_sites(L_of_L)  # deduplicate
  154.     L_of_L.sort(reverse=True, key=itemgetter(3))  # sort by views again
  155.     return L_of_L[:20]  # return first 20
  156.  
  157.  
  158. def main():
  159.     """
  160.    This function asks user for input, then loops through options ( see PROMPT )
  161.    then runs functions based on the user input, while formatting the data in a
  162.    way that is uniform.
  163.    Returns: None
  164.    """
  165.     print("----- Web Data -----")
  166.     file_pointer = open_file()  # open file
  167.     full_list = read_file(file_pointer)  # save list before loop
  168.     cont = True
  169.     while cont:
  170.         print(PROMPT)
  171.         user_input = input("Choice: ")  # user input
  172.         if user_input == "1":
  173.             print("--------- Top 20 by Country -----------")
  174.             country = input("Country: ")
  175.             print("{:30s} {:>15s}{:>25s}".format("Website", "Traffic Rank",
  176.                                                  "Average Daily Page Views"))
  177.             for x in top_sites_per_country(full_list, country):
  178.                 print("{:30s} {:>15d}{:>30,d}".format(x[1], x[2], x[3]))
  179.         elif user_input == "2":
  180.             c = 0  # Add counter to check if none found
  181.             search = input("Search: ").lower()
  182.             print("{:^50s}".format("Websites Matching Query"))
  183.             for x in full_list:
  184.                 if search in x[1]:
  185.                     print("{:^10s}".format(x[1]))
  186.                     c += 1
  187.             if c == 0:
  188.                 print("None found")
  189.         elif user_input == "3":
  190.             print("--------- Top 20 by Page View -----------")
  191.             print("{:30s} {:>15s}".format("Website", "Ave Daily Page Views"))
  192.             for x in top_sites_per_views(full_list):
  193.                 print("{:30s} {:>15,d}".format(x[1], x[3]))
  194.         elif user_input.lower() == "q":
  195.             cont = False
  196.         else:
  197.             print("Incorrect input. Try again.")
  198.  
  199.  
  200. if __name__ == "__main__":  # Initialize the function
  201.     main()  # Start function
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top