Advertisement
acclivity

pyCS-Grads-Analysis

Mar 15th, 2023
649
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.17 KB | Software | 0 0
  1. def obtainFile():
  2.  
  3.     # MJK I'm not keen on arbitrary flags to control flow.
  4.     # For me a big feature of functions is the ability to exit at any point when a return condition is found.
  5.     # (I admit, not everyone agrees with this! But for me it makes code shorter and clearer).
  6.     # Here I use break to exit the loop, then we can return
  7.  
  8.     while True:                     # Keep looping until a valid file is given
  9.         inFile = input('Please enter the name of the data file: ')
  10.  
  11.         # MJK I think it is worth adding a check that the file entered is a .csv filw
  12.         if not inFile.lower().endswith('.csv'):
  13.             print("Must be a .csv file, try again ...")
  14.             continue
  15.  
  16.         try:
  17.             newFile = open(inFile, 'r')
  18.             break
  19.  
  20.         except IOError:
  21.             print("Invalid file name, try again ...")
  22.  
  23.     return newFile  # MJK Exit from function when valid file name is input
  24.  
  25.  
  26. def getData():
  27.     yearList = []
  28.     totalList = []
  29.     menList = []
  30.     womenList = []
  31.     inFile = obtainFile()               # MJK Get the file handle of the .csv file
  32.  
  33.     lineReader = inFile.readline()      # MJK Read and skip the header row
  34.  
  35.     # MJK Read data rows of the .csv file
  36.     for lineReader in inFile:
  37.         fields = lineReader.split(',')              # MJK read one row and split into fields
  38.         if len(fields) < 4:
  39.             continue                                # expecting at least 4 columns per row
  40.  
  41.         # get these 4 columns individually, as we are not sure how many columns actually exist in the .csv
  42.         year, total, men, women = fields[0], fields[1], fields[2], fields[3]
  43.  
  44.         # Note: Probably a good idea to use .strip() on each column data
  45.         # We could put these lines within a try: in case of any bad data
  46.         yearList.append(int(year.strip()))
  47.         totalList.append(int(total.strip()))
  48.         menList.append(int(men.strip()))
  49.         womenList.append(int(women.strip()))
  50.  
  51.     inFile.close()
  52.     return yearList, totalList, menList, womenList              # Returning all data (same func for Q1 and Q2)
  53.  
  54.  
  55. # MJK New utility function to get any integer from user input
  56. def getInt(prompt):
  57.     while True:                             # Loop until an integer is entered
  58.         user_in = input(prompt)
  59.         if user_in.isdigit():               # check that the input consists of digits only
  60.             break
  61.         print("Non-numeric input. Try again\n")
  62.  
  63.     return int(user_in)                     # Return integer value when an integer is input
  64.  
  65. # MJK Here is a function to get a year within the required range (range as defined by the .CSV file data itself)
  66. # yearList is the full list of years extracted from the CSV file
  67. # For Question 2, year must be between earliestYear+1, and latestYear, inclusive
  68. # This is better than using a fixed range of 1972 to 2011 inclusive
  69. def getYear(prompt, yearList, question):
  70.     # Find the minimum year, and the maximum year, from the list of years in the CSV file
  71.     minYear = min(yearList)
  72.     if question == 2:                       # for Q2, minimum allowed year is earliest year +1
  73.         minYear += 1
  74.     maxYear = max(yearList)                 # Get the latest year from the CSV
  75.  
  76.     while True:
  77.         y = getInt(prompt)                  # Get a valid integer from user input
  78.         if minYear <= y <=  maxYear:        # Check user year within required range
  79.             break
  80.  
  81.         print("Year out of range. Try again\n")
  82.  
  83.     return y                    # Exit when a valid year is given
  84.  
  85.  
  86. # This function is for Question 1 only
  87. # This returns 2 years, with the 2nd being greater than the first
  88. # yearList is the complete list of years found in the CSV file
  89. def getYears(yearList, question):
  90.     while True:
  91.         year1 = getYear("Enter earlier of two years: ", yearList, question)
  92.         year2 = getYear("Enter later of two years: ", yearList, question)
  93.         if year2 > year1:
  94.             break
  95.         print("2nd year not greater than first. Try again\n")
  96.  
  97.     return year1, year2
  98.  
  99.  
  100. def computePercentChange(yearList, year1, womenList):
  101.  
  102.     # storeFileName = ("womenGradChangeFrom" + str(year1) + ".csv")
  103.     # MJK use f-string formatting is nicer
  104.     storeFileName = f"womenGradChangeFrom{year1}.csv"
  105.     f = open(storeFileName, "w")
  106.  
  107.     chosenYearIndex = yearList.index(year1)  # position of chosen year in list
  108.  
  109.     for i in range(chosenYearIndex, len(yearList)):
  110.         difference = (womenList[i] - womenList[i - 1]) / womenList[i - 1]  #
  111.  
  112.         # MJK NOTE: They have asked for a percentage difference, so multiple ratio by 100
  113.         percentDiff = 100 * difference
  114.  
  115.         # f.write(str((yearList[i])) + ',' + str(percentDiff) + '\n')
  116.         # MJK Again, f-string formatting is nice
  117.         f.write(f"{yearList[i]},{percentDiff}\n")
  118.     f.close()
  119.     print("Your data has been written to the file " + storeFileName)
  120.  
  121.  
  122. def main():
  123.     # MJK: Use the same function for Q1 and Q2 for getting data
  124.     yearList, totalList, menList, womenList = getData()
  125.  
  126.     year1 = getYear("Enter a year: ", yearList, 2)          # Question 2
  127.     computePercentChange(yearList, year1, womenList)
  128.  
  129.  
  130. main()
  131.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement