Advertisement
themaleem

Statistics module

Apr 15th, 2023 (edited)
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.19 KB | None | 0 0
  1. # STATISTICS  MODULE
  2.  
  3.  
  4. from dataset import load_users_transactions
  5.  
  6. trans = load_users_transactions()
  7.  
  8.  
  9. # reusable median function
  10. def median(array):
  11.     # this function returns the median of a list passed to it
  12.  
  13.     # sort and count the amounts array
  14.     array.sort()
  15.     length = len(array)
  16.     # if the length is an even number
  17.     # then divide the sum of medians
  18.     if length % 2 == 0:
  19.         return (array[length // 2] + array[(length // 2) - 1]) / 2
  20.     else:
  21.         return array[(length // 2)]
  22.  
  23.  
  24. # reusable interquatile function
  25. def interquatile_range(array):
  26.     # this function returns the interquartile of a list passed to it
  27.     array.sort()
  28.     # if the length of the list is odd
  29.     # split the list into two by the index of its median
  30.     length = len(array)
  31.     if length % 2 == 1:
  32.         middle_index = (length - 1) // 2
  33.         first_half = array[:middle_index]
  34.         second_half = array[(middle_index + 1) :]
  35.         # find interquatile range
  36.         iqr = median(second_half) - median(first_half)
  37.         return iqr
  38.     else:
  39.         middle_index = length // 2
  40.         first_half = array[:middle_index]
  41.         second_half = array[middle_index:]
  42.         # find interquatile range
  43.         iqr = median(second_half) - median(first_half)
  44.         return iqr
  45.  
  46.  
  47. # reusable mean function
  48. def mean(array):
  49.     return sum(array) / len(array)
  50.  
  51.  
  52. # reusable standard deviation function
  53. def standard_deviation(array):
  54.     # Find the mean (average) of the dataset.
  55.     average = mean(array)
  56.     # For each data point, subtract the mean and then square the result.
  57.     u = []
  58.     for item in array:
  59.         u.append((item - average) ** 2)
  60.     # Add up all of the squared differences.
  61.     sum_of_u = sum(u)
  62.     # Divide the total by the number of data points (N).
  63.     # Take the square root of the result to get the standard deviation.
  64.     return (sum_of_u / len(array)) ** 0.5
  65.  
  66.  
  67. # reusable z-score  function
  68. def z_score(array):
  69.     z_scores = []
  70.  
  71.     # Calculate the mean of the list
  72.     average = mean(array)
  73.  
  74.     # Calculate the standard deviation of the list
  75.     stdev = standard_deviation(array)
  76.     # Calculate the z-scores of each data point in the list
  77.     for number in array:
  78.         z_scores.append((number - average) / stdev)
  79.     return z_scores
  80.  
  81.  
  82. # reusable nth-percentile  function
  83. def nth_percentile(array, percentile):
  84.     # A function that returns nth percentiles of an array
  85.  
  86.     # Step 1: Sort the list of numbers
  87.     array.sort()
  88.  
  89.     # Step 2: Calculate the index of the percentile
  90.     index = (percentile / 100) * (len(array) - 1) + 1
  91.  
  92.     # Step 3: If the index is an integer, the  percentile is the value at that index in the sorted list. If the index is not an integer, round it up to the nearest whole number and interpolate between the two adjacent values in the sorted list to find the value of the percentile.
  93.     if index.is_integer():
  94.         percentile = array[int(index) - 1]
  95.     else:
  96.         lower_index = int(index) - 1
  97.         upper_index = lower_index + 1
  98.         lower_value = array[lower_index]
  99.         upper_value = array[upper_index]
  100.         percentile = lower_value + (index - lower_index) * (upper_value - lower_value)
  101.  
  102.     return percentile
  103.  
  104.  
  105. def get_average_of_user_or_all():
  106.     print(
  107.         """
  108. Calculate mean
  109. Press 1 to calculate mean of all transactions
  110. Press 2 to calculate mean for a user
  111.        """
  112.     )
  113.     option = eval(input())
  114.     # initalize total cost and count variables to be used for calculation
  115.     amounts = []
  116.     if option == 1:
  117.         # loop through all the transactions in the dataset
  118.         for user_id in trans.keys():
  119.             user_trans = trans[user_id]["transactions"].values()
  120.             # looping through individual transaction and adding it to total
  121.             #  and keeping count of transactions
  122.             for transaction_detail in list(user_trans):
  123.                 amounts.append(transaction_detail["amount"])
  124.         print("Mean of all users is", mean(amounts))
  125.  
  126.     elif option == 2:
  127.         user_id = input("Input user ID: ")
  128.         # check if user_id exists
  129.         if user_id in trans.keys():
  130.             user_trans = trans[user_id]["transactions"].values()
  131.             # looping through individual transaction and adding it to total
  132.             #  and keeping count of transactions
  133.             for transaction_detail in list(user_trans):
  134.                 amounts.append(transaction_detail["amount"])
  135.             print("Mean of User", user_id, "is", mean(amounts))
  136.         else:
  137.             return "User ID doesn't exist!"
  138.     else:
  139.         return "User has selected an unavailable option"
  140.  
  141.  
  142. def get_median_of_user_or_all():
  143.     print(
  144.         """
  145. Calculate median
  146. Press 1 to calculate median of all transactions
  147. Press 2 to calculate median for a user
  148.        """
  149.     )
  150.     option = eval(input())
  151.     amounts = []
  152.     if option == 1:
  153.         # loop through all the transactions in the dataset
  154.         for user_id in trans.keys():
  155.             user_trans = trans[user_id]["transactions"].values()
  156.             for transaction_detail in list(user_trans):
  157.                 # looping through individual transaction and append amount to amounts
  158.                 amounts.append(transaction_detail["amount"])
  159.         print("Median of all users is", median(amounts))
  160.  
  161.     elif option == 2:
  162.         user_id = input("Input user ID: ")
  163.  
  164.         if user_id in trans.keys():
  165.             user_trans = trans[user_id]["transactions"].values()
  166.             for transaction_detail in list(user_trans):
  167.                 # looping through individual transaction and append amount to amounts
  168.                 amounts.append(transaction_detail["amount"])
  169.             # sort and count the amounts array
  170.             print("Mean of User", user_id, "is", median(amounts))
  171.  
  172.     else:
  173.         return "User has selected an unavailable option"
  174.  
  175.  
  176. def get_mode_of_user_or_all():
  177.     print(
  178.         """
  179. Calculate Mode
  180. Press 1 to calculate mode of all transactions
  181. Press 2 to calculate mode for a user
  182.        """
  183.     )
  184.     option = eval(input())
  185.     names = []
  186.     freq_dictionary = {}
  187.     if option == 1:
  188.         # loop through all the transactions in the dataset
  189.         for user_id in trans.keys():
  190.             user_trans = trans[user_id]["transactions"].values()
  191.             # looping through individual transaction and append amount to amounts
  192.             for transaction_detail in list(user_trans):
  193.                 names.append(transaction_detail["name"].strip())
  194.  
  195.         #  counting the occurence of names
  196.         for name in names:
  197.             # if name doesn't already exist, initialize and increment it by 1
  198.             # else: just increment it by 1 since it exists
  199.             if name not in freq_dictionary.keys():
  200.                 freq_dictionary[name] = 0
  201.                 freq_dictionary[name] += 1
  202.             else:
  203.                 freq_dictionary[name] += 1
  204.  
  205.         highest_key = ""
  206.         highest_occurence = 0
  207.         for name, occurence in freq_dictionary.items():
  208.             if occurence > highest_occurence:
  209.                 highest_occurence = occurence
  210.                 highest_key = name
  211.  
  212.         print(f"Mode of all user's transactions is {highest_key}")
  213.  
  214.     elif option == 2:
  215.         user_id = input("Input user ID: ")
  216.  
  217.         user_trans = trans[user_id]["transactions"].values()
  218.         for transaction_detail in list(user_trans):
  219.             # looping through individual transaction and append amount to amounts
  220.             names.append(transaction_detail["name"].strip())
  221.  
  222.         #  counting the occurence of names
  223.         for name in names:
  224.             # if name doesn't already exist, initialize and increment it by 1
  225.             # else: just increment it by 1 since it exists
  226.             if name not in freq_dictionary.keys():
  227.                 freq_dictionary[name] = 0
  228.                 freq_dictionary[name] += 1
  229.             else:
  230.                 freq_dictionary[name] += 1
  231.  
  232.         highest_key = ""
  233.         highest_occurence = 0
  234.         for name, occurence in freq_dictionary.items():
  235.             if occurence > highest_occurence:
  236.                 highest_occurence = occurence
  237.                 highest_key = name
  238.         print(f"Mode of User {user_id}'s transactions is {highest_key}")
  239.  
  240.     else:
  241.         print("User has selected an unavailable option")
  242.  
  243.  
  244. def get_user_location_centroid():
  245.     print("computes the location centroid of any specific user’s transaction.")
  246.  
  247.     user_id = input("Input user ID: ")
  248.  
  249.     list_of_x = []
  250.     list_of_y = []
  251.  
  252.     user_trans = trans[user_id]["transactions"].values()
  253.     for transaction_detail in list(user_trans):
  254.         # looping through individual transaction appending thier x and y coordinates
  255.         list_of_x.append(transaction_detail["x"])
  256.         list_of_y.append(transaction_detail["y"])
  257.  
  258.     print(
  259.         f"Location centroid of user {user_id} is Y: {mean(list_of_x)}, X: {mean(list_of_y)}"
  260.     )
  261.  
  262.  
  263. def get_interquartile_of_user_or_all():
  264.     # A function that returns the interquartile range of any user’s transactions and of all users.
  265.     print(
  266.         """
  267. Calculate interquartile
  268. Press 1 to calculate interquartile of all transactions
  269. Press 2 to calculate interquartile for a user
  270.        """
  271.     )
  272.  
  273.     option = eval(input())
  274.     amounts = []
  275.     if option == 1:
  276.         # loop through all the transactions in the dataset
  277.         for user_id in trans.keys():
  278.             user_trans = trans[user_id]["transactions"].values()
  279.             for transaction_detail in list(user_trans):
  280.                 # looping through individual transaction and append amount to amounts
  281.                 amounts.append(transaction_detail["amount"])
  282.         print(
  283.             f"The interquartile range of all user's transactions is {interquatile_range(amounts)}"
  284.         )
  285.  
  286.     elif option == 2:
  287.         user_id = input("Input user ID: ")
  288.  
  289.         if user_id in trans.keys():
  290.             user_trans = trans[user_id]["transactions"].values()
  291.             for transaction_detail in list(user_trans):
  292.                 # looping through individual transaction and append amount to amounts
  293.                 amounts.append(transaction_detail["amount"])
  294.         print(
  295.             f"The interquartile range of User {user_id}'s transactions is {interquatile_range(amounts)}"
  296.         )
  297.  
  298.     else:
  299.         print("User has selected an unavailable option")
  300.  
  301.  
  302. def get_standard_deviation_of_user_or_all():
  303.     # A function that computes the standard deviation of any specific user’s transaction.
  304.  
  305.     print(
  306.         """
  307. Calculate standard deviation
  308. Press 1 to calculate standard deviation  of all transactions
  309. Press 2 to calculate standard deviation  for a user
  310.        """
  311.     )
  312.  
  313.     option = eval(input())
  314.     # initalize total cost and count variables to be used for calculation
  315.     amounts = []
  316.     if option == 1:
  317.         # loop through all the transactions in the dataset
  318.         for user_id in trans.keys():
  319.             user_trans = trans[user_id]["transactions"].values()
  320.             # looping through individual transaction and adding it to total
  321.             #  and keeping count of transactions
  322.             for transaction_detail in list(user_trans):
  323.                 amounts.append(transaction_detail["amount"])
  324.         print(
  325.             f"The standard deviation of all user's transactions is {standard_deviation(amounts)}"
  326.         )
  327.  
  328.     elif option == 2:
  329.         user_id = input("Input user ID: ")
  330.         # check if user_id exists
  331.         if user_id in trans.keys():
  332.             user_trans = trans[user_id]["transactions"].values()
  333.             # looping through individual transaction and adding it to total
  334.             #  and keeping count of transactions
  335.             for transaction_detail in list(user_trans):
  336.                 amounts.append(transaction_detail["amount"])
  337.  
  338.             print(
  339.                 f"The interquartile range of User {user_id}'s transactions is {standard_deviation(amounts)}"
  340.             )
  341.         else:
  342.             print("User ID doesn't exist!")
  343.     else:
  344.         print("User has selected an unavailable option")
  345.  
  346.  
  347. def get_z_score_of_user_or_all():
  348.     # A function that computes the z_score of any specific user’s transaction.
  349.  
  350.     print(
  351.         """
  352. Calculate z_score
  353. Press 1 to calculate z_score  of all transactions
  354. Press 2 to calculate z_score  for a user
  355.        """
  356.     )
  357.  
  358.     option = eval(input())
  359.     # initalize total cost and count variables to be used for calculation
  360.     amounts = []
  361.     if option == 1:
  362.         # loop through all the transactions in the dataset
  363.         for user_id in trans.keys():
  364.             user_trans = trans[user_id]["transactions"].values()
  365.             # looping through individual transaction and adding it to total
  366.             #  and keeping count of transactions
  367.             for transaction_detail in list(user_trans):
  368.                 amounts.append(transaction_detail["amount"])
  369.  
  370.         scores = z_score(amounts)
  371.         for index in range(len(scores)):
  372.             print("The z-score of ", amounts[index], " is ", scores[index])
  373.  
  374.     elif option == 2:
  375.         user_id = input("Input user ID: ")
  376.         # check if user_id exists
  377.         if user_id in trans.keys():
  378.             user_trans = trans[user_id]["transactions"].values()
  379.             # looping through individual transaction and adding it to total
  380.             #  and keeping count of transactions
  381.             for transaction_detail in list(user_trans):
  382.                 amounts.append(transaction_detail["amount"])
  383.  
  384.         scores = z_score(amounts)
  385.         for index in range(len(scores)):
  386.             print("The z-score of ", amounts[index], " is ", scores[index])
  387.         else:
  388.             return "User ID doesn't exist!"
  389.     else:
  390.         return "User has selected an unavailable option"
  391.  
  392.  
  393. def get_nth_percentile_of_user_or_all():
  394.     print(
  395.         """
  396. Calculate mean
  397. Press 1 to calculate mean of all transactions
  398. Press 2 to calculate mean for a user
  399.        """
  400.     )
  401.     option = eval(input())
  402.     # initalize total cost and count variables to be used for calculation
  403.     amounts = []
  404.     if option == 1:
  405.         percentile = eval(input("Input number of percentile: "))
  406.         # loop through all the transactions in the dataset
  407.         for user_id in trans.keys():
  408.             user_trans = trans[user_id]["transactions"].values()
  409.             # looping through individual transaction and adding it to total
  410.             #  and keeping count of transactions
  411.             for transaction_detail in list(user_trans):
  412.                 amounts.append(transaction_detail["amount"])
  413.  
  414.         print(
  415.             "The ",
  416.             percentile,
  417.             " for the list of transactions of all users is ",
  418.             nth_percentile(amounts, percentile),
  419.         )
  420.  
  421.     elif option == 2:
  422.         user_id = input("Input user ID: ")
  423.         # check if user_id exists
  424.         if user_id in trans.keys():
  425.             percentile = eval(input("Input number of percentile: "))
  426.             user_trans = trans[user_id]["transactions"].values()
  427.             # looping through individual transaction and adding it to total
  428.             #  and keeping count of transactions
  429.             for transaction_detail in list(user_trans):
  430.                 amounts.append(transaction_detail["amount"])
  431.             print(
  432.                 "The",
  433.                 percentile,
  434.                 "th for the list of  User id",
  435.                 user_id,
  436.                 "transactions of all users is",
  437.                 nth_percentile(amounts, percentile),
  438.             )
  439.         else:
  440.             return "User ID doesn't exist!"
  441.     else:
  442.         return "User has selected an unavailable option"
  443.  
  444.  
  445. def is_fraudulent_transaction():
  446.     print(
  447.         """
  448. Determine if a transaction is fraudulent or not
  449.        """
  450.     )
  451.     transaction_id = input("Input transaction id: ")
  452.     for user_id in trans.keys():
  453.         user_trans = trans[user_id]["transactions"]
  454.         if transaction_id in user_trans.keys():
  455.             transaction = user_trans[transaction_id]
  456.             print(transaction)
  457.             print("Fradulent:" + " " + transaction["is_fraud"])
  458.             # early return to break after finding the transaction id
  459.             return
  460.     print("Transaction ID doesn't exist!")
  461.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement