Advertisement
KillianMills

DataMining.java

Dec 3rd, 2015
154
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 8.13 KB | None | 0 0
  1. // Why does naive bayes classifier favor datasets with high bias and low variance
  2.  
  3. import java.io.BufferedReader;
  4. import java.io.FileReader;
  5. import java.io.IOException;
  6. import java.util.*;
  7.  
  8. /**
  9.  * Created by admin on 11/30/2015.
  10.  */
  11. /*
  12. #RB = Running Back
  13. #OL = Outside Linebacker
  14. #WR = Wide Receiver
  15. #TE = Tight End
  16. #FB = Full Back **************** merged with RB
  17. #DB = Defensive Back
  18. #QB = Quarter Back
  19. #LB = Linebacker
  20. #DL = Defensive Lineman
  21. */
  22.  
  23. public class DataMining {
  24.  
  25.     //This will hold all of the data, each index will hold a row
  26.     List<String[]> masterList = new ArrayList<>();
  27.  
  28.     //This holds the List names and the list
  29.     HashMap< String, List> printingMap = new HashMap<>();
  30.  
  31.     //This holds the List names and the list
  32.     HashMap< String, Double> meanRoles = new HashMap<>();
  33.  
  34.     //Lists of ages per section
  35.     List<Integer> allAges = new ArrayList<>();
  36.     List<Integer> fullBackAges = new ArrayList<>();
  37.     List<Integer> outsideLineBackerAges = new ArrayList<>();
  38.     List<Integer> wideReceiverAges = new ArrayList<>();
  39.     List<Integer> tightEndAges = new ArrayList<>();
  40.     List<Integer> defensiveBackAges = new ArrayList<>();
  41.     List<Integer> quarterBackAges = new ArrayList<>();
  42.     List<Integer> lineBackerAges = new ArrayList<>();
  43.     List<Integer> defensiveLinemanAges = new ArrayList<>();
  44.     List<Integer> cleanedUpAges = new ArrayList<>();
  45.     List<Integer> modernAges = new ArrayList<>();
  46.  
  47.     //Variables for var and std dev
  48.     HashMap<String,Double> varianceResults = new HashMap<>();
  49.     HashMap<String,Double> stdDevResults = new HashMap<>();
  50.  
  51.     public static void main(String args[]){
  52.         DataMining dm = new DataMining();
  53.         dm.readFile();
  54.         dm.buildAgeList();
  55.         dm.calculateValues();
  56.  
  57.         dm.getVariance(dm.printingMap, dm.meanRoles);
  58.     }
  59.  
  60.     public void readFile(){
  61.         String filename= "src/DataSet_DEADNFLPLAYERS.txt";
  62.  
  63.         try {
  64.             //read in the .txt file
  65.             BufferedReader br = new BufferedReader(new FileReader(filename));
  66.             String line= null;
  67.             String[] lineArray = {};
  68.  
  69.             //need to change this, puts it all into index 0 at the moment
  70.             while ((line = br.readLine()) != null) {
  71.                 // process the line.
  72.                 lineArray = line.split(",");
  73.                 masterList.add(lineArray);
  74.             }
  75.  
  76.         }
  77.         catch(IOException e){
  78.  
  79.             e.printStackTrace();
  80.         }
  81.     }
  82.  
  83.     public void buildAgeList(){
  84.         int deathYear;
  85.         int birthYear;
  86.         int age; // deathYear - birthYear
  87.  
  88.         //fill lists by using .contains RB, OL, WR etc...
  89.         for(int i=1; i< masterList.size(); i++ ){ //starts at 1 to avoid headers
  90.  
  91.             String[] currentLine = masterList.get(i);
  92.  
  93.             deathYear = Integer.parseInt(currentLine[10].replace("\"", "")); //10th element
  94.             birthYear = Integer.parseInt(currentLine[masterList.get(i).length-1].replace("\"", "")); //last element
  95.             age = deathYear - birthYear; // determines the age at death
  96.             allAges.add(age);
  97.  
  98.             List<String> tempArray = Arrays.asList(currentLine);
  99.  
  100.             if(tempArray.contains("\"RB/FB\"")){
  101.                 fullBackAges.add(age);
  102.             }
  103.             else if(tempArray.contains("\"OL\"")){
  104.                 outsideLineBackerAges.add(age);
  105.             }
  106.             else if(tempArray.contains("\"WR\"")){
  107.                 wideReceiverAges.add(age);
  108.             }
  109.             else if(tempArray.contains("\"TE\"")){
  110.                 tightEndAges.add(age);
  111.             }
  112.             else if(tempArray.contains("\"DB\"")){
  113.                 defensiveBackAges.add(age);
  114.             }
  115.             else if(tempArray.contains("\"QB\"")){
  116.                 quarterBackAges.add(age);
  117.             }
  118.             else if(tempArray.contains("\"LB\"")){
  119.                 lineBackerAges.add(age);
  120.             }
  121.             else{ // DL
  122.                 defensiveLinemanAges.add(age);
  123.             }
  124.  
  125.             if(age < 80 && age > 50){
  126.                 cleanedUpAges.add(age);
  127.             }
  128.  
  129.             if(birthYear >= 1940){
  130.                 modernAges.add(age);
  131.             }
  132.         }
  133.  
  134.         printingMap.put("ALL AGES", allAges);
  135.         printingMap.put("FULL BACK AGES", fullBackAges);
  136.         printingMap.put("OUTSIDE LINE BACKER AGES", outsideLineBackerAges);
  137.         printingMap.put("WIDE RECEIVER AGES", wideReceiverAges);
  138.         printingMap.put("DEFENSIVE BACK AGES", defensiveBackAges);
  139.         printingMap.put("QUARTER BACK AGES", quarterBackAges);
  140.         printingMap.put("LINE BACKER AGES", lineBackerAges);
  141.         printingMap.put("DEFENSIVE LINEMAN AGES", defensiveLinemanAges);
  142.         printingMap.put("CLEANED UP AGES", cleanedUpAges);
  143.         printingMap.put("MODERN AGES", modernAges);
  144.     }
  145.  
  146.     public void calculateValues(){
  147.         for(Map.Entry<String, List> entry : printingMap.entrySet() ){
  148.             String key = entry.getKey();
  149.             List value = entry.getValue();
  150.  
  151.             Collections.sort(value);
  152.  
  153.             System.out.println("MEAN OF " + key);
  154.             double mean= mean(value);
  155.             System.out.println(mean);
  156.             System.out.println("MODE OF " + key);
  157.             System.out.println(mode(value));
  158.             System.out.println("MEDIAN OF " + key);
  159.             System.out.println(median(value));
  160.             System.out.println("MIDRANGE OF " + key);
  161.             System.out.println(midRange(value));
  162.             System.out.println("---------------------");
  163.  
  164.             //adds a mean for each role, key is name of list
  165.             meanRoles.put(key, mean);
  166.  
  167.         }
  168.     }
  169.  
  170.     public double mean( List value){
  171.  
  172.         int totalLifeSpan = 0;
  173.  
  174.         for(int i = 0; i < value.size(); i++) { // get first value
  175.             totalLifeSpan += (int) value.get(i);
  176.         }
  177.  
  178.         double averageAges = totalLifeSpan / (value.size()-1);
  179.         return averageAges;
  180.     }
  181.  
  182.     public double mode( List value){
  183.  
  184.         int popularity= 0;
  185.         int mostPop= 0;
  186.         int mode= 0;
  187.         int currentValue;
  188.  
  189.         for(int i = 0; i < value.size(); i++) { // get first value
  190.             currentValue = (int)value.get(i);
  191.  
  192.             for(int j=0; j< value.size(); j++){ // compare to other values in the list
  193.                 if(currentValue == (int)value.get(j)){
  194.                     popularity++; // increment the popularity if same
  195.                 }
  196.  
  197.             }
  198.             if(popularity > mostPop) { // if the new value is bigger than the old
  199.                 mostPop = popularity;
  200.                 mode = (int)value.get(i);
  201.             }
  202.             popularity = 0;
  203.         }
  204.         return((double)mode);
  205.     }
  206.  
  207.     public int median( List value){
  208.  
  209.         int lengthOfArray = value.size();
  210.         int middleIndex = lengthOfArray / 2;
  211.         return((int)value.get(middleIndex));
  212.     }
  213.  
  214.     public double midRange( List value){
  215.  
  216.         double max = (int) value.get(0);
  217.         double min = (int) value.get(value.size()-1);
  218.         double midrange = (min + max) / 2 ;
  219.         return(midrange);
  220.     }
  221.  
  222.     public void getVariance(HashMap<String, List> ageList , HashMap<String, Double> meanList){
  223.  
  224.         for(Map.Entry<String, List> entry : ageList.entrySet() ) {
  225.  
  226.             int counter=0;
  227.             double totalSum=0;
  228.  
  229.             String key = entry.getKey(); // name of current list
  230.             List value = entry.getValue(); // current list
  231.  
  232.             double varRange =(int) value.get(value.size()-1)-(int) value.get(0);
  233.  
  234.             double currentMean = meanList.get(key);
  235.  
  236.             for(Object currentAge:value){
  237.                 double sqrValue = (int)currentAge-currentMean;
  238.                 totalSum += (int)sqrValue^2;
  239.                 counter++;
  240.             }
  241.             System.out.println(key + "  :  " + varRange + "  :  " + totalSum / counter);
  242.             varianceResults.put(key,totalSum/counter);
  243.  
  244.             double standardDeviation = Math.sqrt(totalSum/counter);
  245.             System.out.println(standardDeviation);
  246.             stdDevResults.put(key, standardDeviation);
  247.  
  248.         }
  249.     }
  250. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement