Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Why does naive bayes classifier favor datasets with high bias and low variance
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.*;
- /**
- * Created by admin on 11/30/2015.
- */
- /*
- #RB = Running Back
- #OL = Outside Linebacker
- #WR = Wide Receiver
- #TE = Tight End
- #FB = Full Back **************** merged with RB
- #DB = Defensive Back
- #QB = Quarter Back
- #LB = Linebacker
- #DL = Defensive Lineman
- */
- public class DataMining {
- //This will hold all of the data, each index will hold a row
- List<String[]> masterList = new ArrayList<>();
- //This holds the List names and the list
- HashMap< String, List> printingMap = new HashMap<>();
- //This holds the List names and the list
- HashMap< String, Double> meanRoles = new HashMap<>();
- //Lists of ages per section
- List<Integer> allAges = new ArrayList<>();
- List<Integer> fullBackAges = new ArrayList<>();
- List<Integer> outsideLineBackerAges = new ArrayList<>();
- List<Integer> wideReceiverAges = new ArrayList<>();
- List<Integer> tightEndAges = new ArrayList<>();
- List<Integer> defensiveBackAges = new ArrayList<>();
- List<Integer> quarterBackAges = new ArrayList<>();
- List<Integer> lineBackerAges = new ArrayList<>();
- List<Integer> defensiveLinemanAges = new ArrayList<>();
- List<Integer> cleanedUpAges = new ArrayList<>();
- List<Integer> modernAges = new ArrayList<>();
- //Variables for var and std dev
- HashMap<String,Double> varianceResults = new HashMap<>();
- HashMap<String,Double> stdDevResults = new HashMap<>();
- public static void main(String args[]){
- DataMining dm = new DataMining();
- dm.readFile();
- dm.buildAgeList();
- dm.calculateValues();
- dm.getVariance(dm.printingMap, dm.meanRoles);
- }
- public void readFile(){
- String filename= "src/DataSet_DEADNFLPLAYERS.txt";
- try {
- //read in the .txt file
- BufferedReader br = new BufferedReader(new FileReader(filename));
- String line= null;
- String[] lineArray = {};
- //need to change this, puts it all into index 0 at the moment
- while ((line = br.readLine()) != null) {
- // process the line.
- lineArray = line.split(",");
- masterList.add(lineArray);
- }
- }
- catch(IOException e){
- e.printStackTrace();
- }
- }
- public void buildAgeList(){
- int deathYear;
- int birthYear;
- int age; // deathYear - birthYear
- //fill lists by using .contains RB, OL, WR etc...
- for(int i=1; i< masterList.size(); i++ ){ //starts at 1 to avoid headers
- String[] currentLine = masterList.get(i);
- deathYear = Integer.parseInt(currentLine[10].replace("\"", "")); //10th element
- birthYear = Integer.parseInt(currentLine[masterList.get(i).length-1].replace("\"", "")); //last element
- age = deathYear - birthYear; // determines the age at death
- allAges.add(age);
- List<String> tempArray = Arrays.asList(currentLine);
- if(tempArray.contains("\"RB/FB\"")){
- fullBackAges.add(age);
- }
- else if(tempArray.contains("\"OL\"")){
- outsideLineBackerAges.add(age);
- }
- else if(tempArray.contains("\"WR\"")){
- wideReceiverAges.add(age);
- }
- else if(tempArray.contains("\"TE\"")){
- tightEndAges.add(age);
- }
- else if(tempArray.contains("\"DB\"")){
- defensiveBackAges.add(age);
- }
- else if(tempArray.contains("\"QB\"")){
- quarterBackAges.add(age);
- }
- else if(tempArray.contains("\"LB\"")){
- lineBackerAges.add(age);
- }
- else{ // DL
- defensiveLinemanAges.add(age);
- }
- if(age < 80 && age > 50){
- cleanedUpAges.add(age);
- }
- if(birthYear >= 1940){
- modernAges.add(age);
- }
- }
- printingMap.put("ALL AGES", allAges);
- printingMap.put("FULL BACK AGES", fullBackAges);
- printingMap.put("OUTSIDE LINE BACKER AGES", outsideLineBackerAges);
- printingMap.put("WIDE RECEIVER AGES", wideReceiverAges);
- printingMap.put("DEFENSIVE BACK AGES", defensiveBackAges);
- printingMap.put("QUARTER BACK AGES", quarterBackAges);
- printingMap.put("LINE BACKER AGES", lineBackerAges);
- printingMap.put("DEFENSIVE LINEMAN AGES", defensiveLinemanAges);
- printingMap.put("CLEANED UP AGES", cleanedUpAges);
- printingMap.put("MODERN AGES", modernAges);
- }
- public void calculateValues(){
- for(Map.Entry<String, List> entry : printingMap.entrySet() ){
- String key = entry.getKey();
- List value = entry.getValue();
- Collections.sort(value);
- System.out.println("MEAN OF " + key);
- double mean= mean(value);
- System.out.println(mean);
- System.out.println("MODE OF " + key);
- System.out.println(mode(value));
- System.out.println("MEDIAN OF " + key);
- System.out.println(median(value));
- System.out.println("MIDRANGE OF " + key);
- System.out.println(midRange(value));
- System.out.println("---------------------");
- //adds a mean for each role, key is name of list
- meanRoles.put(key, mean);
- }
- }
- public double mean( List value){
- int totalLifeSpan = 0;
- for(int i = 0; i < value.size(); i++) { // get first value
- totalLifeSpan += (int) value.get(i);
- }
- double averageAges = totalLifeSpan / (value.size()-1);
- return averageAges;
- }
- public double mode( List value){
- int popularity= 0;
- int mostPop= 0;
- int mode= 0;
- int currentValue;
- for(int i = 0; i < value.size(); i++) { // get first value
- currentValue = (int)value.get(i);
- for(int j=0; j< value.size(); j++){ // compare to other values in the list
- if(currentValue == (int)value.get(j)){
- popularity++; // increment the popularity if same
- }
- }
- if(popularity > mostPop) { // if the new value is bigger than the old
- mostPop = popularity;
- mode = (int)value.get(i);
- }
- popularity = 0;
- }
- return((double)mode);
- }
- public int median( List value){
- int lengthOfArray = value.size();
- int middleIndex = lengthOfArray / 2;
- return((int)value.get(middleIndex));
- }
- public double midRange( List value){
- double max = (int) value.get(0);
- double min = (int) value.get(value.size()-1);
- double midrange = (min + max) / 2 ;
- return(midrange);
- }
- public void getVariance(HashMap<String, List> ageList , HashMap<String, Double> meanList){
- for(Map.Entry<String, List> entry : ageList.entrySet() ) {
- int counter=0;
- double totalSum=0;
- String key = entry.getKey(); // name of current list
- List value = entry.getValue(); // current list
- double varRange =(int) value.get(value.size()-1)-(int) value.get(0);
- double currentMean = meanList.get(key);
- for(Object currentAge:value){
- double sqrValue = (int)currentAge-currentMean;
- totalSum += (int)sqrValue^2;
- counter++;
- }
- System.out.println(key + " : " + varRange + " : " + totalSum / counter);
- varianceResults.put(key,totalSum/counter);
- double standardDeviation = Math.sqrt(totalSum/counter);
- System.out.println(standardDeviation);
- stdDevResults.put(key, standardDeviation);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement