Advertisement
KillianMills

DataMiningExtended1.py

Oct 18th, 2015
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.39 KB | None | 0 0
  1. #RB = Running Back
  2. #OL = Outside Linebacker
  3. #WR = Wide Receiver
  4. #TE = Tight End
  5. #FB = Full Back **************** merged with RB
  6. #DB = Defensive Back
  7. #QB = Quarter Back
  8. #LB = Linebacker
  9. #DL = Defensive Lineman
  10.  
  11. def middle(L):
  12.     L = sorted(L)
  13.     n = len(L)
  14.     m = n - 1
  15.     return (L[n/2] + L[m/2]) / 2.0
  16.  
  17. def main():
  18.     masterDict = {} # will only be added to, not reset
  19.     #with open('tester.txt') as f:
  20.     with open('DataSet_DEADNFLPLAYERS.txt') as f:
  21.         for line in f:
  22.             current_line = line.split(',')
  23.             masterDict.update({len(masterDict) : current_line})
  24.  
  25.     #print masterDict
  26.  
  27.     total_lifeSpan = 0 # will hold total lifespan of all players
  28.     listAges = [] # will hold a list of all player's ages
  29.  
  30.     # lists for ages of each role
  31.     fullBackAges = []
  32.     #runningBackAges = [] # full back and runningBack are the same in our dataset
  33.     outsideLineBackerAges = []
  34.     wideReceiverAges = []
  35.     tightEndAges = []
  36.     defensiveBackAges = []
  37.     quarterBackAges = []
  38.     lineBackerAges = []
  39.     defensiveLinemanAges = []
  40.  
  41.     cleanedUpAges = []
  42.     modernAges = []
  43.  
  44.     for key, value in masterDict.items():
  45.         if(key != 0):
  46.             deathYear = int(float(value[10].strip())) #10th element
  47.             birthYear = int(float(value[len(value)-1].strip())) #last element
  48.             age = deathYear - birthYear # determines the age at death
  49.             listAges.append(age) # adds age to a list
  50.             total_lifeSpan = total_lifeSpan + age # the combinded age of all players at death
  51.  
  52.             if "FB" in value[8]:
  53.                 fullBackAges.append(age)
  54.             #elif "RB" in value[8]: # full back and runningBack are the same in our dataset
  55.                 #runningBackAges.append(age)
  56.             elif "OL" in value[8]:
  57.                 outsideLineBackerAges.append(age)
  58.             elif "WR" in value[8]:
  59.                 wideReceiverAges.append(age)
  60.             elif "TE" in value[8]:
  61.                 tightEndAges.append(age)
  62.             elif "DB" in value[8]:
  63.                  defensiveBackAges.append(age)
  64.             elif "QB" in value[8]:
  65.                 quarterBackAges.append(age)
  66.             elif "LB" in value[8]:
  67.                 lineBackerAges.append(age)
  68.             elif "DL" in value[8]:
  69.                 defensiveLinemanAges.append(age)
  70.                
  71.             if((age < 80) and (age > 50)):
  72.                 cleanedUpAges.append(age)
  73.  
  74.             if birthYear >= 1940:
  75.                 modernAges.append(age)
  76.      
  77.                            
  78.                
  79.     #---------------------------FOR ALL ROLES----------------------------------
  80.  
  81.     #mean, average age across all players
  82.     #print "MEAN OF ALL"
  83.     average_age = total_lifeSpan / (len(masterDict) -1)
  84.     #print average_age
  85.     #print "----------"
  86.    
  87.     #mode, the most commonly occuring value
  88.     #print "MODE OF ALL"
  89.     from collections import Counter
  90.     data = Counter(listAges)
  91.     modeData = data.most_common(1)
  92.     newModeData = [x[0] for x in modeData]
  93.     #print data.most_common(1)
  94.     #print "----------"
  95.  
  96.     #median, the least extreme value or the middle value
  97.     #print "MEDIAN OF ALL"
  98.     #print middle(listAges)
  99.     #print "----------"
  100.  
  101.     #mid-range, the mean of the highest and lowest value
  102.     #print "MIDRANGE OF ALL"
  103.     midrange = min(listAges) + max(listAges)
  104.     midrange = midrange / 2
  105.     #print midrange
  106.     #print "----------"
  107.     #print "----------"
  108.     #print "----------"
  109.  
  110.     #print newModeData
  111.     #newMode = (3 * midrange) - (2 * newModeData[0])
  112.     newMode = (3 * middle(listAges)) - ( 2 * average_age)
  113.     print "MEAN OF ALL"
  114.     print newMode
  115.  
  116.     print "MEDIAN OF ALL"
  117.     newMedian =  newModeData[0] + (2 * (average_age - newModeData[0]) / 3)
  118.     print newMedian
  119.  
  120.     print "MEAN - MODE"
  121.     newValue =  average_age - newModeData[0]
  122.     print newValue
  123.  
  124.     #-------------------------FOR THE NINE ROLES-----------------------------
  125.     printingList = []
  126.  
  127.     printingList.append(fullBackAges)
  128.     #printingList.append(runningBackAges)# full back and runningBack are the same in our dataset
  129.     printingList.append(outsideLineBackerAges)
  130.     printingList.append(wideReceiverAges)
  131.     printingList.append(tightEndAges)
  132.     printingList.append(defensiveBackAges)
  133.     printingList.append(quarterBackAges)
  134.     printingList.append(lineBackerAges)
  135.     printingList.append(defensiveLinemanAges)
  136.  
  137.     printingList.append(cleanedUpAges)
  138.     printingList.append(modernAges)
  139.  
  140.     for playerList in printingList:
  141.  
  142.         #mean, average age across all players
  143.         #print ("MEAN OF ")#, playerList)
  144.         average_age = sum(playerList) / (len(playerList) -1)
  145.         #print average_age
  146.         #print "----------"
  147.    
  148.         #mode, the most commonly occuring value
  149.         #print ("MODE OF ")# % playerList)
  150.         from collections import Counter
  151.         data = Counter(playerList)
  152.         #print data.most_common(1)
  153.         #print "----------"
  154.  
  155.         #median, the least extreme value or the middle value
  156.         #print ("MEDIAN OF)# " % playerList)
  157.         #print middle(playerList)
  158.         #print "----------"
  159.  
  160.         #mid-range, the mean of the highest and lowest value
  161.         #print ("MIDRANGE OF ")# % playerList
  162.         midrange = min(playerList) + max(playerList)
  163.         midrange = midrange / 2
  164.         #print midrange
  165.         #print "----------"
  166.         #print "----------"
  167.         #print "----------"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement