Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // käy läpi tiedostot sekä tallentaa hashmappiin string int sana -> sanan esiintymämäärä
- package spamham;
- import java.io.File;
- import java.io.FileNotFoundException;
- import static java.lang.Integer.parseInt;
- import java.util.HashMap;
- import java.util.Scanner;
- public class Infomuncher {
- int hamTotal;
- int spamTotal;
- File spam;
- File ham;
- HashMap<String, Float> hamOdds;
- HashMap<String, Float> spamOdds;
- public Infomuncher(File s, File h) {
- spam = s;
- ham = h;
- }
- public void generateOdds() throws FileNotFoundException {
- Scanner lukija = new Scanner(spam);
- hamOdds = new HashMap<String, Float>();
- spamOdds = new HashMap<String, Float>();
- String[] array;
- float apuri;
- while (lukija.hasNextLine()) {
- array = lukija.nextLine().trim().split(" ");
- apuri = parseInt(array[0]);
- spamOdds.put(array[1], apuri);
- }
- lukija = new Scanner(ham);
- while (lukija.hasNextLine()) {
- array = lukija.nextLine().trim().split(" ");
- apuri = parseInt(array[0]);
- hamOdds.put(array[1], apuri);
- }
- }
- public HashMap getSpamOdds() {
- return this.spamOdds;
- }
- public HashMap getHamOdds() {
- return this.hamOdds;
- }
- }
- package spamham;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.math.BigDecimal;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Scanner;
- public class Spamham {
- public double spamicity(File f, Infomuncher im) throws FileNotFoundException {
- Scanner lukija = new Scanner(f);
- String[] apuri;
- HashMap<String, Float> hamOdds = im.getHamOdds();
- HashMap<String, Float> spamOdds = im.getSpamOdds();
- ArrayList<String> viesti = new ArrayList<>();
- while (lukija.hasNextLine()) {
- apuri = lukija.nextLine().split(" ");
- for(String s : apuri) {
- viesti.add(s);
- }
- }
- float odds = (float) 0.5;
- float apuri1;
- float apuri2;
- for (String s : viesti) {
- if (hamOdds.containsKey(s) && spamOdds.containsKey(s)) {
- apuri2 = hamOdds.get(s) / 290673;
- apuri1 = spamOdds.get(s) / 75268;
- } else if (hamOdds.containsKey(s)) {
- apuri2 = hamOdds.get(s) / 290673;
- apuri1 = (float) 0.00001;
- } else if (spamOdds.containsKey(s)) {
- apuri1 = spamOdds.get(s) / 75268;
- apuri2 = (float) 0.000003;
- } else {
- apuri1 = (float) 0.00001;
- apuri2 = (float) 0.000003;
- }
- odds = (float) (odds + Math.log(apuri1 / apuri2));
- System.out.println(odds);
- }
- return odds;
- }
- public double logOdds(double x, double y) {
- return Math.log(x / y);
- }
- }
- package spamham;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.IOException;
- import java.math.BigDecimal;
- public class main {
- public static void main(String[] args) throws FileNotFoundException, IOException {
- File spam = new File("spamcount.txt");
- File ham = new File("hamcount.txt");
- File msg = new File("penis.txt");
- Spamham sh = new Spamham();
- Infomuncher jyrki = new Infomuncher(spam, ham);
- jyrki.generateOdds();
- System.out.println(sh.spamicity(msg, jyrki));
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement