Advertisement
Guest User

Untitled

a guest
Jan 20th, 2020
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.80 KB | None | 0 0
  1. public Map<String, Map<Sentiment, Double>> calculateSmoothedLogProbs(Map<Path, Sentiment> trainingSet) throws IOException {
  2. // get the count of all words in all the positive paths and negative paths separately
  3. double accPos = 0;
  4. double accNeg = 0;
  5. for (Path path : trainingSet.keySet()){
  6. List<String> review = Tokenizer.tokenize(path);
  7. if (trainingSet.get(path).equals(Sentiment.POSITIVE)) {
  8. accPos += review.size();
  9. } else {
  10. accNeg += review.size();
  11. }
  12. }
  13. // creating the hashmap
  14. Map <String, Map<Sentiment, Double>> wordProbability = new HashMap<String, Map<Sentiment, Double>>();
  15. for (Path path : trainingSet.keySet()){
  16. List<String> review = Tokenizer.tokenize(path);
  17. for (String string : review) {
  18. if (wordProbability.containsKey(string)) {
  19. // if string is already a key, check if there is a hashmap that contains the current Sentiment value
  20. Map<Sentiment, Double> temp = wordProbability.get(string);
  21. if (temp.containsKey(trainingSet.get(path))) {
  22. // if the sentiment key already exist, add on toe the value
  23. double tempDouble = temp.get(trainingSet.get(path)) + 1;
  24. temp.replace(trainingSet.get(path), tempDouble);
  25. } else {
  26. // else create a new sentiment key with 1 to start
  27. temp.put(trainingSet.get(path), (double) 1);
  28. }
  29. wordProbability.replace(string, temp);
  30. } else {
  31. // if the string does not exist, create a new string key with a hashmap of the current sentiment value
  32. HashMap<Sentiment, Double> anotherTemp = new HashMap<Sentiment, Double>();
  33. anotherTemp.put(trainingSet.get(path), (double) 1);
  34. wordProbability.put(string, anotherTemp);
  35. }
  36. }
  37. }
  38. // after generating the hashmap, checks if both positive and negative are covered or just one side
  39. for (String string : wordProbability.keySet()){
  40. Map<Sentiment, Double> yetAnotherTemp = wordProbability.get(string);
  41. if (yetAnotherTemp.size() == 1){
  42. for (Sentiment sentiment : yetAnotherTemp.keySet()) {
  43. // if only one side is represented
  44. Sentiment otherSentiment = sentiment.equals(Sentiment.POSITIVE) ? Sentiment.NEGATIVE : Sentiment.POSITIVE;
  45. yetAnotherTemp.replace(sentiment, yetAnotherTemp.get(sentiment) + 1);
  46. yetAnotherTemp.put(otherSentiment, (double) 1);
  47. accNeg ++;
  48. accPos ++;
  49. }
  50. wordProbability.replace(string, yetAnotherTemp);
  51. } else {
  52. // if both sides are represented
  53. for (Sentiment sentiment : yetAnotherTemp.keySet()) {
  54. yetAnotherTemp.replace(sentiment, yetAnotherTemp.get(sentiment) + 1);
  55. }
  56. wordProbability.replace(string, yetAnotherTemp);
  57. accNeg ++;
  58. accPos ++;
  59. }
  60. // dividing each value by its respective accumulator to obtain probability
  61. for (Sentiment sentiment: wordProbability.get(string).keySet()) {
  62. double isPosAcc = sentiment.equals(Sentiment.POSITIVE) ? accPos : accNeg;
  63. double tempPos = wordProbability.get(string).get(sentiment)/ isPosAcc;
  64. wordProbability.get(string).replace(sentiment,Math.log(tempPos));
  65. }
  66. }
  67. return wordProbability;
  68. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement