Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # STATISTICS MODULE
- from dataset import load_users_transactions
- trans = load_users_transactions()
- # reusable median function
- def median(array):
- # this function returns the median of a list passed to it
- # sort and count the amounts array
- array.sort()
- length = len(array)
- # if the length is an even number
- # then divide the sum of medians
- if length % 2 == 0:
- return (array[length // 2] + array[(length // 2) - 1]) / 2
- else:
- return array[(length // 2)]
- # reusable interquatile function
- def interquatile_range(array):
- # this function returns the interquartile of a list passed to it
- array.sort()
- # if the length of the list is odd
- # split the list into two by the index of its median
- length = len(array)
- if length % 2 == 1:
- middle_index = (length - 1) // 2
- first_half = array[:middle_index]
- second_half = array[(middle_index + 1) :]
- # find interquatile range
- iqr = median(second_half) - median(first_half)
- return iqr
- else:
- middle_index = length // 2
- first_half = array[:middle_index]
- second_half = array[middle_index:]
- # find interquatile range
- iqr = median(second_half) - median(first_half)
- return iqr
- # reusable mean function
- def mean(array):
- return sum(array) / len(array)
- # reusable standard deviation function
- def standard_deviation(array):
- # Find the mean (average) of the dataset.
- average = mean(array)
- # For each data point, subtract the mean and then square the result.
- u = []
- for item in array:
- u.append((item - average) ** 2)
- # Add up all of the squared differences.
- sum_of_u = sum(u)
- # Divide the total by the number of data points (N).
- # Take the square root of the result to get the standard deviation.
- return (sum_of_u / len(array)) ** 0.5
- # reusable z-score function
- def z_score(array):
- z_scores = []
- # Calculate the mean of the list
- average = mean(array)
- # Calculate the standard deviation of the list
- stdev = standard_deviation(array)
- # Calculate the z-scores of each data point in the list
- for number in array:
- z_scores.append((number - average) / stdev)
- return z_scores
- # reusable nth-percentile function
- def nth_percentile(array, percentile):
- # A function that returns nth percentiles of an array
- # Step 1: Sort the list of numbers
- array.sort()
- # Step 2: Calculate the index of the percentile
- index = (percentile / 100) * (len(array) - 1) + 1
- # Step 3: If the index is an integer, the percentile is the value at that index in the sorted list. If the index is not an integer, round it up to the nearest whole number and interpolate between the two adjacent values in the sorted list to find the value of the percentile.
- if index.is_integer():
- percentile = array[int(index) - 1]
- else:
- lower_index = int(index) - 1
- upper_index = lower_index + 1
- lower_value = array[lower_index]
- upper_value = array[upper_index]
- percentile = lower_value + (index - lower_index) * (upper_value - lower_value)
- return percentile
- def get_average_of_user_or_all():
- print(
- """
- Calculate mean
- Press 1 to calculate mean of all transactions
- Press 2 to calculate mean for a user
- """
- )
- option = eval(input())
- # initalize total cost and count variables to be used for calculation
- amounts = []
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print("Mean of all users is", mean(amounts))
- elif option == 2:
- user_id = input("Input user ID: ")
- # check if user_id exists
- if user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print("Mean of User", user_id, "is", mean(amounts))
- else:
- return "User ID doesn't exist!"
- else:
- return "User has selected an unavailable option"
- def get_median_of_user_or_all():
- print(
- """
- Calculate median
- Press 1 to calculate median of all transactions
- Press 2 to calculate median for a user
- """
- )
- option = eval(input())
- amounts = []
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction and append amount to amounts
- amounts.append(transaction_detail["amount"])
- print("Median of all users is", median(amounts))
- elif option == 2:
- user_id = input("Input user ID: ")
- if user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction and append amount to amounts
- amounts.append(transaction_detail["amount"])
- # sort and count the amounts array
- print("Mean of User", user_id, "is", median(amounts))
- else:
- return "User has selected an unavailable option"
- def get_mode_of_user_or_all():
- print(
- """
- Calculate Mode
- Press 1 to calculate mode of all transactions
- Press 2 to calculate mode for a user
- """
- )
- option = eval(input())
- names = []
- freq_dictionary = {}
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and append amount to amounts
- for transaction_detail in list(user_trans):
- names.append(transaction_detail["name"].strip())
- # counting the occurence of names
- for name in names:
- # if name doesn't already exist, initialize and increment it by 1
- # else: just increment it by 1 since it exists
- if name not in freq_dictionary.keys():
- freq_dictionary[name] = 0
- freq_dictionary[name] += 1
- else:
- freq_dictionary[name] += 1
- highest_key = ""
- highest_occurence = 0
- for name, occurence in freq_dictionary.items():
- if occurence > highest_occurence:
- highest_occurence = occurence
- highest_key = name
- print(f"Mode of all user's transactions is {highest_key}")
- elif option == 2:
- user_id = input("Input user ID: ")
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction and append amount to amounts
- names.append(transaction_detail["name"].strip())
- # counting the occurence of names
- for name in names:
- # if name doesn't already exist, initialize and increment it by 1
- # else: just increment it by 1 since it exists
- if name not in freq_dictionary.keys():
- freq_dictionary[name] = 0
- freq_dictionary[name] += 1
- else:
- freq_dictionary[name] += 1
- highest_key = ""
- highest_occurence = 0
- for name, occurence in freq_dictionary.items():
- if occurence > highest_occurence:
- highest_occurence = occurence
- highest_key = name
- print(f"Mode of User {user_id}'s transactions is {highest_key}")
- else:
- print("User has selected an unavailable option")
- def get_user_location_centroid():
- print("computes the location centroid of any specific user’s transaction.")
- user_id = input("Input user ID: ")
- list_of_x = []
- list_of_y = []
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction appending thier x and y coordinates
- list_of_x.append(transaction_detail["x"])
- list_of_y.append(transaction_detail["y"])
- print(
- f"Location centroid of user {user_id} is Y: {mean(list_of_x)}, X: {mean(list_of_y)}"
- )
- def get_interquartile_of_user_or_all():
- # A function that returns the interquartile range of any user’s transactions and of all users.
- print(
- """
- Calculate interquartile
- Press 1 to calculate interquartile of all transactions
- Press 2 to calculate interquartile for a user
- """
- )
- option = eval(input())
- amounts = []
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction and append amount to amounts
- amounts.append(transaction_detail["amount"])
- print(
- f"The interquartile range of all user's transactions is {interquatile_range(amounts)}"
- )
- elif option == 2:
- user_id = input("Input user ID: ")
- if user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- for transaction_detail in list(user_trans):
- # looping through individual transaction and append amount to amounts
- amounts.append(transaction_detail["amount"])
- print(
- f"The interquartile range of User {user_id}'s transactions is {interquatile_range(amounts)}"
- )
- else:
- print("User has selected an unavailable option")
- def get_standard_deviation_of_user_or_all():
- # A function that computes the standard deviation of any specific user’s transaction.
- print(
- """
- Calculate standard deviation
- Press 1 to calculate standard deviation of all transactions
- Press 2 to calculate standard deviation for a user
- """
- )
- option = eval(input())
- # initalize total cost and count variables to be used for calculation
- amounts = []
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print(
- f"The standard deviation of all user's transactions is {standard_deviation(amounts)}"
- )
- elif option == 2:
- user_id = input("Input user ID: ")
- # check if user_id exists
- if user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print(
- f"The interquartile range of User {user_id}'s transactions is {standard_deviation(amounts)}"
- )
- else:
- print("User ID doesn't exist!")
- else:
- print("User has selected an unavailable option")
- def get_z_score_of_user_or_all():
- # A function that computes the z_score of any specific user’s transaction.
- print(
- """
- Calculate z_score
- Press 1 to calculate z_score of all transactions
- Press 2 to calculate z_score for a user
- """
- )
- option = eval(input())
- # initalize total cost and count variables to be used for calculation
- amounts = []
- if option == 1:
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- scores = z_score(amounts)
- for index in range(len(scores)):
- print("The z-score of ", amounts[index], " is ", scores[index])
- elif option == 2:
- user_id = input("Input user ID: ")
- # check if user_id exists
- if user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- scores = z_score(amounts)
- for index in range(len(scores)):
- print("The z-score of ", amounts[index], " is ", scores[index])
- else:
- return "User ID doesn't exist!"
- else:
- return "User has selected an unavailable option"
- def get_nth_percentile_of_user_or_all():
- print(
- """
- Calculate mean
- Press 1 to calculate mean of all transactions
- Press 2 to calculate mean for a user
- """
- )
- option = eval(input())
- # initalize total cost and count variables to be used for calculation
- amounts = []
- if option == 1:
- percentile = eval(input("Input number of percentile: "))
- # loop through all the transactions in the dataset
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print(
- "The ",
- percentile,
- " for the list of transactions of all users is ",
- nth_percentile(amounts, percentile),
- )
- elif option == 2:
- user_id = input("Input user ID: ")
- # check if user_id exists
- if user_id in trans.keys():
- percentile = eval(input("Input number of percentile: "))
- user_trans = trans[user_id]["transactions"].values()
- # looping through individual transaction and adding it to total
- # and keeping count of transactions
- for transaction_detail in list(user_trans):
- amounts.append(transaction_detail["amount"])
- print(
- "The",
- percentile,
- "th for the list of User id",
- user_id,
- "transactions of all users is",
- nth_percentile(amounts, percentile),
- )
- else:
- return "User ID doesn't exist!"
- else:
- return "User has selected an unavailable option"
- def is_fraudulent_transaction():
- print(
- """
- Determine if a transaction is fraudulent or not
- """
- )
- transaction_id = input("Input transaction id: ")
- for user_id in trans.keys():
- user_trans = trans[user_id]["transactions"]
- if transaction_id in user_trans.keys():
- transaction = user_trans[transaction_id]
- print(transaction)
- print("Fradulent:" + " " + transaction["is_fraud"])
- # early return to break after finding the transaction id
- return
- print("Transaction ID doesn't exist!")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement