Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public Map<String, Map<Sentiment, Double>> calculateSmoothedLogProbs(Map<Path, Sentiment> trainingSet) throws IOException {
- // get the count of all words in all the positive paths and negative paths separately
- double accPos = 0;
- double accNeg = 0;
- for (Path path : trainingSet.keySet()){
- List<String> review = Tokenizer.tokenize(path);
- if (trainingSet.get(path).equals(Sentiment.POSITIVE)) {
- accPos += review.size();
- } else {
- accNeg += review.size();
- }
- }
- // creating the hashmap
- Map <String, Map<Sentiment, Double>> wordProbability = new HashMap<String, Map<Sentiment, Double>>();
- for (Path path : trainingSet.keySet()){
- List<String> review = Tokenizer.tokenize(path);
- for (String string : review) {
- if (wordProbability.containsKey(string)) {
- // if string is already a key, check if there is a hashmap that contains the current Sentiment value
- Map<Sentiment, Double> temp = wordProbability.get(string);
- if (temp.containsKey(trainingSet.get(path))) {
- // if the sentiment key already exist, add on toe the value
- double tempDouble = temp.get(trainingSet.get(path)) + 1;
- temp.replace(trainingSet.get(path), tempDouble);
- } else {
- // else create a new sentiment key with 1 to start
- temp.put(trainingSet.get(path), (double) 1);
- }
- wordProbability.replace(string, temp);
- } else {
- // if the string does not exist, create a new string key with a hashmap of the current sentiment value
- HashMap<Sentiment, Double> anotherTemp = new HashMap<Sentiment, Double>();
- anotherTemp.put(trainingSet.get(path), (double) 1);
- wordProbability.put(string, anotherTemp);
- }
- }
- }
- // after generating the hashmap, checks if both positive and negative are covered or just one side
- for (String string : wordProbability.keySet()){
- Map<Sentiment, Double> yetAnotherTemp = wordProbability.get(string);
- if (yetAnotherTemp.size() == 1){
- for (Sentiment sentiment : yetAnotherTemp.keySet()) {
- // if only one side is represented
- Sentiment otherSentiment = sentiment.equals(Sentiment.POSITIVE) ? Sentiment.NEGATIVE : Sentiment.POSITIVE;
- yetAnotherTemp.replace(sentiment, yetAnotherTemp.get(sentiment) + 1);
- yetAnotherTemp.put(otherSentiment, (double) 1);
- accNeg ++;
- accPos ++;
- }
- wordProbability.replace(string, yetAnotherTemp);
- } else {
- // if both sides are represented
- for (Sentiment sentiment : yetAnotherTemp.keySet()) {
- yetAnotherTemp.replace(sentiment, yetAnotherTemp.get(sentiment) + 1);
- }
- wordProbability.replace(string, yetAnotherTemp);
- accNeg ++;
- accPos ++;
- }
- // dividing each value by its respective accumulator to obtain probability
- for (Sentiment sentiment: wordProbability.get(string).keySet()) {
- double isPosAcc = sentiment.equals(Sentiment.POSITIVE) ? accPos : accNeg;
- double tempPos = wordProbability.get(string).get(sentiment)/ isPosAcc;
- wordProbability.get(string).replace(sentiment,Math.log(tempPos));
- }
- }
- return wordProbability;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement