KillianMills

DataMining.java with Player class

Dec 4th, 2015
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 10.05 KB | None | 0 0
  1. // Why does naive bayes classifier favor datasets with high bias and low variance
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.FileReader;
  5. import java.io.IOException;
  6. import java.util.*;
  7.  
  8. /**
  9.  * Created by admin on 11/30/2015.
  10.  */
  11. /*
  12. #RB = Running Back
  13. #OL = Outside Linebacker
  14. #WR = Wide Receiver
  15. #TE = Tight End
  16. #FB = Full Back **************** merged with RB
  17. #DB = Defensive Back
  18. #QB = Quarter Back
  19. #LB = Linebacker
  20. #DL = Defensive Lineman
  21. */
  22. class Player{
  23.     int age;
  24.     int weight;
  25.     int height;
  26.     String position;
  27.     String birthLocation;
  28.  
  29.     Player(int age, int weight, int height, String position, String birthLocation) {
  30.         this.age =age;
  31.         this.weight =weight;
  32.         this.height =height;
  33.         this.position =position;
  34.         this.birthLocation =birthLocation;
  35.     }
  36. }
  37.  
  38. public class DataMining {
  39.  
  40.     //This will hold all of the data, each index will hold a row
  41.     List<String[]> masterList = new ArrayList<>();
  42.  
  43.     //This holds the List names and the list
  44.     HashMap< String, List> printingMap = new HashMap<>();
  45.  
  46.     //This holds the List names and the list
  47.     HashMap< String, Double> meanRoles = new HashMap<>();
  48.  
  49.  
  50.     List<Player> playerHolder = new ArrayList<>();
  51.  
  52.     //Lists of ages per section
  53.     List<Integer> allAges = new ArrayList<>();
  54.     List<Integer> fullBackAges = new ArrayList<>();
  55.     List<Integer> outsideLineBackerAges = new ArrayList<>();
  56.     List<Integer> wideReceiverAges = new ArrayList<>();
  57.     List<Integer> tightEndAges = new ArrayList<>();
  58.     List<Integer> defensiveBackAges = new ArrayList<>();
  59.     List<Integer> quarterBackAges = new ArrayList<>();
  60.     List<Integer> lineBackerAges = new ArrayList<>();
  61.     List<Integer> defensiveLinemanAges = new ArrayList<>();
  62.     List<Integer> cleanedUpAges = new ArrayList<>();
  63.     List<Integer> modernAges = new ArrayList<>();
  64.  
  65.     //Variables for var and std dev
  66.     HashMap<String,Double> varianceResults = new HashMap<>();
  67.     HashMap<String,Double> stdDevResults = new HashMap<>();
  68.  
  69.     public static void main(String args[]){
  70.         DataMining dm = new DataMining();
  71.         dm.readFile();
  72.         dm.buildAgeList();
  73.         dm.calculateValues();
  74.  
  75.         dm.getVariance(dm.printingMap, dm.meanRoles);
  76.     }
  77.  
  78.     public void readFile(){
  79.         String filename= "src/DataSet_DEADNFLPLAYERS.txt";
  80.         //String filename= "src/tester.txt";
  81.  
  82.         try {
  83.             //read in the .txt file
  84.             BufferedReader br = new BufferedReader(new FileReader(filename));
  85.             String line= null;
  86.             String[] lineArray = {};
  87.  
  88.             //need to change this, puts it all into index 0 at the moment
  89.             while ((line = br.readLine()) != null) {
  90.                 // process the line.
  91.                 lineArray = line.split(",");
  92.                 masterList.add(lineArray);
  93.             }
  94.  
  95.         }
  96.         catch(IOException e){
  97.  
  98.             e.printStackTrace();
  99.         }
  100.     }
  101.  
  102.     public void buildAgeList(){
  103.         int deathYear;
  104.         int birthYear;
  105.         int age; // deathYear - birthYear
  106.         int weight;
  107.         int height;
  108.         String heightHolder;
  109.         String position;
  110.         String birthLocation;
  111.  
  112.         //fill lists by using .contains RB, OL, WR etc...
  113.         for(int i=1; i< masterList.size(); i++ ){ //starts at 1 to avoid headers
  114.  
  115.             String[] currentLine = masterList.get(i);
  116.  
  117.             deathYear = Integer.parseInt(currentLine[10].replace("\"", "")); //10th element
  118.             birthYear = Integer.parseInt(currentLine[masterList.get(i).length-1].replace("\"", "")); //last element
  119.             age = deathYear - birthYear; // determines the age at death
  120.             allAges.add(age);
  121.  
  122.             if(currentLine[16].length() == 5 && currentLine[17].length() == 5 ) { // "6'1" "188" 5 charactes each
  123.                 heightHolder = currentLine[16].replace("\"", "");
  124.                 weight = Integer.parseInt(currentLine[17].replace("\"", ""));
  125.                 position = currentLine[8].replace("\"", "");
  126.                 birthLocation = currentLine[masterList.get(i).length - 2].replace("\"", "");
  127.  
  128.                 //System.out.println(heightHolder.charAt(0));
  129.                 //System.out.println(heightHolder.charAt(2));
  130.                 height = Character.getNumericValue(heightHolder.charAt(0)) * 30;
  131.                 System.out.println(height);
  132.                 height = height + (Character.getNumericValue(heightHolder.charAt(2)) * 2);
  133.                 System.out.println(height);
  134.                 Player temp = new Player(age, weight, height, position, birthLocation );
  135.                 playerHolder.add(temp);
  136.             }
  137.  
  138.  
  139.             List<String> tempArray = Arrays.asList(currentLine);
  140.  
  141.             if(tempArray.contains("\"RB/FB\"")){
  142.                 fullBackAges.add(age);
  143.             }
  144.             else if(tempArray.contains("\"OL\"")){
  145.                 outsideLineBackerAges.add(age);
  146.             }
  147.             else if(tempArray.contains("\"WR\"")){
  148.                 wideReceiverAges.add(age);
  149.             }
  150.             else if(tempArray.contains("\"TE\"")){
  151.                 tightEndAges.add(age);
  152.             }
  153.             else if(tempArray.contains("\"DB\"")){
  154.                 defensiveBackAges.add(age);
  155.             }
  156.             else if(tempArray.contains("\"QB\"")){
  157.                 quarterBackAges.add(age);
  158.             }
  159.             else if(tempArray.contains("\"LB\"")){
  160.                 lineBackerAges.add(age);
  161.             }
  162.             else{ // DL
  163.                 defensiveLinemanAges.add(age);
  164.             }
  165.  
  166.             if(age < 80 && age > 50){
  167.                 cleanedUpAges.add(age);
  168.             }
  169.  
  170.             if(birthYear >= 1940){
  171.                 modernAges.add(age);
  172.             }
  173.         }
  174.  
  175.         printingMap.put("ALL AGES", allAges);
  176.         printingMap.put("FULL BACK AGES", fullBackAges);
  177.         printingMap.put("OUTSIDE LINE BACKER AGES", outsideLineBackerAges);
  178.         printingMap.put("WIDE RECEIVER AGES", wideReceiverAges);
  179.         printingMap.put("DEFENSIVE BACK AGES", defensiveBackAges);
  180.         printingMap.put("QUARTER BACK AGES", quarterBackAges);
  181.         printingMap.put("LINE BACKER AGES", lineBackerAges);
  182.         printingMap.put("DEFENSIVE LINEMAN AGES", defensiveLinemanAges);
  183.         printingMap.put("CLEANED UP AGES", cleanedUpAges);
  184.         printingMap.put("MODERN AGES", modernAges);
  185.     }
  186.  
  187.     public void calculateValues(){
  188.         for(Map.Entry<String, List> entry : printingMap.entrySet() ){
  189.             String key = entry.getKey();
  190.             List value = entry.getValue();
  191.  
  192.             Collections.sort(value);
  193.  
  194.             System.out.println("MEAN OF " + key);
  195.             double mean= mean(value);
  196.             System.out.println(mean);
  197.             System.out.println("MODE OF " + key);
  198.             System.out.println(mode(value));
  199.             System.out.println("MEDIAN OF " + key);
  200.             System.out.println(median(value));
  201.             System.out.println("MIDRANGE OF " + key);
  202.             System.out.println(midRange(value));
  203.             System.out.println("---------------------");
  204.  
  205.             //adds a mean for each role, key is name of list
  206.             meanRoles.put(key, mean);
  207.  
  208.         }
  209.     }
  210.  
  211.     public double mean( List value){
  212.  
  213.         int totalLifeSpan = 0;
  214.  
  215.         for(int i = 0; i < value.size(); i++) { // get first value
  216.             totalLifeSpan += (int) value.get(i);
  217.         }
  218.  
  219.         double averageAges = totalLifeSpan / (value.size()-1);
  220.         return averageAges;
  221.     }
  222.  
  223.     public double mode( List value){
  224.  
  225.         int popularity= 0;
  226.         int mostPop= 0;
  227.         int mode= 0;
  228.         int currentValue;
  229.  
  230.         for(int i = 0; i < value.size(); i++) { // get first value
  231.             currentValue = (int)value.get(i);
  232.  
  233.             for(int j=0; j< value.size(); j++){ // compare to other values in the list
  234.                 if(currentValue == (int)value.get(j)){
  235.                     popularity++; // increment the popularity if same
  236.                 }
  237.  
  238.             }
  239.             if(popularity > mostPop) { // if the new value is bigger than the old
  240.                 mostPop = popularity;
  241.                 mode = (int)value.get(i);
  242.             }
  243.             popularity = 0;
  244.         }
  245.         return((double)mode);
  246.     }
  247.  
  248.     public int median( List value){
  249.  
  250.         int lengthOfArray = value.size();
  251.         int middleIndex = lengthOfArray / 2;
  252.         return((int)value.get(middleIndex));
  253.     }
  254.  
  255.     public double midRange( List value){
  256.  
  257.         double max = (int) value.get(0);
  258.         double min = (int) value.get(value.size()-1);
  259.         double midrange = (min + max) / 2 ;
  260.         return(midrange);
  261.     }
  262.  
  263.     public void getVariance(HashMap<String, List> ageList , HashMap<String, Double> meanList){
  264.  
  265.         for(Map.Entry<String, List> entry : ageList.entrySet() ) {
  266.  
  267.             int counter=0;
  268.             double totalSum=0;
  269.  
  270.             String key = entry.getKey(); // name of current list
  271.             List value = entry.getValue(); // current list
  272.  
  273.             double varRange =(int) value.get(value.size()-1)-(int) value.get(0);
  274.  
  275.             double currentMean = meanList.get(key);
  276.  
  277.             for(Object currentAge:value){
  278.                 double sqrValue = (int)currentAge-currentMean;
  279.                 totalSum += (int)sqrValue^2;
  280.                 counter++;
  281.             }
  282.             System.out.println(key + "  :  " + varRange + "  :  " + totalSum / counter);
  283.             varianceResults.put(key,totalSum/counter);
  284.  
  285.             double standardDeviation = Math.sqrt(totalSum/counter);
  286.             System.out.println(standardDeviation);
  287.             stdDevResults.put(key, standardDeviation);
  288.  
  289.         }
  290.  
  291.         System.out.println("----------------------");
  292.         System.out.println(playerHolder);
  293.         System.out.println(playerHolder.size());
  294.         Player temp = playerHolder.get(0);
  295.         System.out.println(temp.age);
  296.         System.out.println(temp.height);
  297.         System.out.println(temp.weight);
  298.         System.out.println(temp.position);
  299.         System.out.println(temp.birthLocation);
  300.     }
  301. }
Advertisement
Add Comment
Please, Sign In to add comment