Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.65 KB | None | 0 0
  1. ###########################################################
  2.  
  3. #   Computer Project #6
  4. #
  5. #   open_file()
  6. #       prompt for input
  7. #       check that input is correct (loop if not)
  8. #       return fp
  9. #
  10. #   read_file(fp)
  11. #       open csv reader
  12. #       for each line in reader
  13. #           if correct data exists
  14. #               create tuple of specific data
  15. #               append to list
  16. #       return list
  17. #
  18. #   remove_duplicate_sites(list)
  19. #       for each tuple in list
  20. #       if site seen before
  21. #           add it to main & seen list
  22. #       else
  23. #           ignore
  24. #       return list
  25. #
  26. #   top_sites_per_country(list, country)
  27. #       for each tuple in list
  28. #           if country = [4]
  29. #               add to new list
  30. #       return list
  31. #
  32. #   top_sites_per_view(list)
  33. #       sort list by views
  34. #       remove_duplicate_sites(list)
  35. #       sort list by views
  36. #       return list
  37. #
  38. #   main()
  39. #       open_file()
  40. #       generate list by read_file(fp)
  41. #       while true
  42. #           prompt
  43. #           ask user for input
  44. #           if input is 1
  45. #               ask for country
  46. #               for sites in top_sites_per_country()
  47. #                   print with formatting
  48. #           elif input is 2
  49. #               prompt for search
  50. #                   for sites in full_list
  51. #                       if search = full_list[1]
  52. #                           print with formatting
  53. #                   if no sites found
  54. #                       print message
  55. #           elif input is 3
  56. #               for sites in top_sites_per_view()
  57. #                   print with formatting
  58. #           elif input is q
  59. #               exit message
  60. #               kill loop
  61. #           else
  62. #               prompt again
  63.  
  64. ###########################################################
  65.  
  66.  
  67. import csv  # Used for CSV Reader
  68. from operator import itemgetter  # Used for sorting
  69.  
  70. PROMPT = '''
  71. Choose
  72.         (1) Top sites by country
  73.         (2) Search by web site name
  74.         (3) Top sites by views
  75.         (q) Quit
  76.         '''  # Main prompt
  77.  
  78.  
  79. def open_file():
  80.     """
  81.    This function opens a file for reading
  82.    Returns: file pointer
  83.    """
  84.     cont = True
  85.     # loop to ensure we open a file correctly
  86.     while cont:
  87.         try_file = input("Input a filename: ")
  88.         try:  # try/except to ensure file exists and opens correctly
  89.             file_obj = open(try_file, "r", encoding="ISO-8859-1")
  90.             cont = False
  91.         except:
  92.             print("Error: file not found.")
  93.             cont = True
  94.     return file_obj  # return the correct file
  95.  
  96.  
  97. def read_file(fp):
  98.     """
  99.    This function reads file from open_file() and collects interesting data
  100.    Returns: sorted list of tuples
  101.    """
  102.     website_list = []  # functional list
  103.     reader = csv.reader(fp)  # open csv reader
  104.     next(reader, None)  # skip header
  105.     for row in reader:  # iterate for evert tuple
  106.         try:
  107.             row_min = (int(row[0]), row[1], int(row[14].replace(" ", "")),
  108.                        int(row[5].replace(" ", "")),
  109.                        row[30])  # interesting data
  110.             website_list.append(row_min)  # append to main list
  111.         except:
  112.             pass  # ignore rows that fail that test - ex: int(N/A)
  113.     return sorted(website_list, key=itemgetter(0, 4))  # double sorted list
  114.  
  115.  
  116. def remove_duplicate_sites(L_of_L):
  117.     """
  118.    This function iterates through a list, and if a website has been seen before
  119.    it gets ignored and disregarded from output (deduplication)
  120.    Returns: sorted list of tuples
  121.    """
  122.     seen_sites, new_list = [], []  # create seen list and list to be sorted later
  123.     for entry in L_of_L:
  124.         site = entry[1].split('.')  # split website to actual name
  125.         if site[1] in seen_sites:  # check if in list
  126.             pass
  127.         else:
  128.             seen_sites.append(site[1])  # add to list
  129.             new_list.append(entry)  # add to list
  130.     return sorted(new_list, key=itemgetter(0, 1))  # return double sorted list
  131.  
  132.  
  133. def top_sites_per_country(L_of_L, country):
  134.     """
  135.    This function itterates through the list and checks for top sites based on
  136.    user input (country)
  137.    Returns: list of 20 tuples
  138.    """
  139.     new_list = []
  140.     for entry in L_of_L:
  141.         if entry[4] == country:  # check if countries match
  142.             new_list.append(entry)
  143.     new_list = sorted(new_list, key=itemgetter(0))  # single sort
  144.     return new_list[:20]  # return first 20
  145.  
  146.  
  147. def top_sites_per_views(L_of_L):
  148.     """
  149.    This function sorts a list by top views, deduplicated, then sorts again
  150.    Returns: list of 20 tuples
  151.    """
  152.     L_of_L.sort(reverse=True, key=itemgetter(3))  # sort by views
  153.     L_of_L = remove_duplicate_sites(L_of_L)  # deduplicate
  154.     L_of_L.sort(reverse=True, key=itemgetter(3))  # sort by views again
  155.     return L_of_L[:20]  # return first 20
  156.  
  157.  
  158. def main():
  159.     """
  160.    This function asks user for input, then loops through options ( see PROMPT )
  161.    then runs functions based on the user input, while formatting the data in a
  162.    way that is uniform.
  163.    Returns: None
  164.    """
  165.     print("----- Web Data -----")
  166.     file_pointer = open_file()  # open file
  167.     full_list = read_file(file_pointer)  # save list before loop
  168.     cont = True
  169.     while cont:
  170.         print(PROMPT)
  171.         user_input = input("Choice: ")  # user input
  172.         if user_input == "1":
  173.             print("--------- Top 20 by Country -----------")
  174.             country = input("Country: ")
  175.             print("{:30s} {:>15s}{:>25s}".format("Website", "Traffic Rank",
  176.                                                  "Average Daily Page Views"))
  177.             for x in top_sites_per_country(full_list, country):
  178.                 print("{:30s} {:>15d}{:>30,d}".format(x[1], x[2], x[3]))
  179.         elif user_input == "2":
  180.             c = 0  # Add counter to check if none found
  181.             search = input("Search: ").lower()
  182.             print("{:^50s}".format("Websites Matching Query"))
  183.             for x in full_list:
  184.                 if search in x[1]:
  185.                     print("{:^10s}".format(x[1]))
  186.                     c += 1
  187.             if c == 0:
  188.                 print("None found")
  189.         elif user_input == "3":
  190.             print("--------- Top 20 by Page View -----------")
  191.             print("{:30s} {:>15s}".format("Website", "Ave Daily Page Views"))
  192.             for x in top_sites_per_views(full_list):
  193.                 print("{:30s} {:>15,d}".format(x[1], x[3]))
  194.         elif user_input.lower() == "q":
  195.             cont = False
  196.         else:
  197.             print("Incorrect input. Try again.")
  198.  
  199.  
  200. if __name__ == "__main__":  # Initialize the function
  201.     main()  # Start function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement