Advertisement
Guest User

Untitled

a guest
Feb 19th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.24 KB | None | 0 0
  1. /**
  2. * Method to create the emission and the transmission map
  3. * @param emission
  4. * @param transmission
  5. * @param wordarraylist
  6. * @throws IOException
  7. */
  8. public void emission_and_transmission (Map<String, Map<String, Double>> emission, Map<String, Map<String, Double>> transmission, List<String[]> wordarraylist) throws IOException{
  9. try {
  10. BufferedReader words_train_input = new BufferedReader(new FileReader(inputs_sentences)); //buffered reader for reading in file
  11. BufferedReader tags_train_input = new BufferedReader(new FileReader(inputs_tags)); //buffered reader for reading in file
  12. String wordlines, tagslines;
  13. Map<String, Double> tagmap = new HashMap<String, Double>(); // Maps each tag with its frequency
  14. List<String[]> tagarraylist = new ArrayList<String[]>();
  15.  
  16. while ((tagslines = tags_train_input.readLine()) != null) { //reading in tags line by line
  17. String [] tagarray = tagslines.split(" ");
  18. tagarraylist.add(tagarray); //adding tagarray to a list of string arrays
  19. }
  20.  
  21. while ((wordlines = words_train_input.readLine()) != null) { //reading in wordsline by line
  22. String [] wordarray = wordlines.split(" ");
  23. wordarraylist.add(wordarray); //adding tagarray to a list of string arrays
  24. }
  25.  
  26. for (int i=0; i<wordarraylist.size(); i++) { //looping over entire string array
  27. for (int j=0; j<wordarraylist.get(i).length; j++) { //looping over every element of the string array except
  28. if (!tagarraylist.get(i)[j].equals(".")){ //if tag isn't a period
  29. if (!emission.containsKey(tagarraylist.get(i)[j])) { //if emission map doesn't contain the tag
  30.  
  31. emission.put(tagarraylist.get(i)[j], new HashMap<String, Double>()); //put it in with an empty map
  32.  
  33. }
  34. if (!emission.get(tagarraylist.get(i)[j]).containsKey(wordarraylist.get(i)[j])) { //if the value map doesn't contain the word
  35. emission.get(tagarraylist.get(i)[j]).put(wordarraylist.get(i)[j], 1.0); //put the word with frequency of 1
  36. }
  37. else { //if it does contain the word with the correct tag, increment the frequency by 1
  38. emission.get(tagarraylist.get(i)[j]).put(wordarraylist.get(i)[j], emission.get(tagarraylist.get(i)[j]).get(wordarraylist.get(i)[j]) + 1.0);
  39. }
  40. }
  41. }
  42. }
  43.  
  44.  
  45. Map<String, Double> totalmap = new HashMap<String, Double>();
  46.  
  47. for(String tag: emission.keySet()) { // Changing frequency to ln(frequency/total)
  48. Double total = 0.0;
  49. for(Map.Entry<String, Double> e: emission.get(tag).entrySet()){
  50. total += e.getValue();
  51. }
  52. totalmap.put(tag, total);
  53. for(Map.Entry<String, Double> e: emission.get(tag).entrySet()){
  54. e.setValue(Math.log(e.getValue()/total));
  55. }
  56. }
  57.  
  58. words_train_input.close();
  59. tags_train_input.close();
  60.  
  61. //transmission
  62. for (String tag: emission.keySet()) { //put every tag from emission into key of transmission
  63. transmission.put(tag, new HashMap<String, Double>());
  64. }
  65.  
  66. transmission.put("#", new HashMap<String, Double>()); //hardcoding for transmission ("#")
  67.  
  68. for (int i=0; i<wordarraylist.size(); i++) { //looping over entire string array
  69. if (!transmission.get("#").containsKey(tagarraylist.get(i)[0])) {
  70. transmission.get("#").put(tagarraylist.get(i)[0], 1.0);
  71. }
  72. else {
  73. transmission.get("#").put(tagarraylist.get(i)[0], transmission.get("#").get(tagarraylist.get(i)[0]) +1.0);
  74. }
  75. for (int j=0; j<wordarraylist.get(i).length-1; j++) { //looping over every element of the string array
  76. if (!transmission.get(tagarraylist.get(i)[j]).containsKey(tagarraylist.get(i)[j+1])) {
  77. transmission.get(tagarraylist.get(i)[j]).put(tagarraylist.get(i)[j+1], 1.0);
  78. }
  79. else {
  80. transmission.get(tagarraylist.get(i)[j]).put(tagarraylist.get(i)[j+1], transmission.get(tagarraylist.get(i)[j]).get(tagarraylist.get(i)[j+1]) +1.0);
  81. }
  82. }
  83. }
  84.  
  85. for(String tag: transmission.keySet()) { // Changing frequency to ln(frequency/total)
  86. for(Map.Entry<String, Double> e: transmission.get(tag).entrySet()){
  87. if (tag.equals("#"))
  88. e.setValue(Math.log(e.getValue()/wordarraylist.size()));
  89. else
  90. e.setValue(Math.log(e.getValue()/totalmap.get(tag)));
  91. }
  92. }
  93. }
  94. catch(IOException e) {
  95. System.out.println("File does not exist!");
  96. }
  97. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement