Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Method to create the emission and the transmission map
- * @param emission
- * @param transmission
- * @param wordarraylist
- * @throws IOException
- */
- public void emission_and_transmission (Map<String, Map<String, Double>> emission, Map<String, Map<String, Double>> transmission, List<String[]> wordarraylist) throws IOException{
- try {
- BufferedReader words_train_input = new BufferedReader(new FileReader(inputs_sentences)); //buffered reader for reading in file
- BufferedReader tags_train_input = new BufferedReader(new FileReader(inputs_tags)); //buffered reader for reading in file
- String wordlines, tagslines;
- Map<String, Double> tagmap = new HashMap<String, Double>(); // Maps each tag with its frequency
- List<String[]> tagarraylist = new ArrayList<String[]>();
- while ((tagslines = tags_train_input.readLine()) != null) { //reading in tags line by line
- String [] tagarray = tagslines.split(" ");
- tagarraylist.add(tagarray); //adding tagarray to a list of string arrays
- }
- while ((wordlines = words_train_input.readLine()) != null) { //reading in wordsline by line
- String [] wordarray = wordlines.split(" ");
- wordarraylist.add(wordarray); //adding tagarray to a list of string arrays
- }
- for (int i=0; i<wordarraylist.size(); i++) { //looping over entire string array
- for (int j=0; j<wordarraylist.get(i).length; j++) { //looping over every element of the string array except
- if (!tagarraylist.get(i)[j].equals(".")){ //if tag isn't a period
- if (!emission.containsKey(tagarraylist.get(i)[j])) { //if emission map doesn't contain the tag
- emission.put(tagarraylist.get(i)[j], new HashMap<String, Double>()); //put it in with an empty map
- }
- if (!emission.get(tagarraylist.get(i)[j]).containsKey(wordarraylist.get(i)[j])) { //if the value map doesn't contain the word
- emission.get(tagarraylist.get(i)[j]).put(wordarraylist.get(i)[j], 1.0); //put the word with frequency of 1
- }
- else { //if it does contain the word with the correct tag, increment the frequency by 1
- emission.get(tagarraylist.get(i)[j]).put(wordarraylist.get(i)[j], emission.get(tagarraylist.get(i)[j]).get(wordarraylist.get(i)[j]) + 1.0);
- }
- }
- }
- }
- Map<String, Double> totalmap = new HashMap<String, Double>();
- for(String tag: emission.keySet()) { // Changing frequency to ln(frequency/total)
- Double total = 0.0;
- for(Map.Entry<String, Double> e: emission.get(tag).entrySet()){
- total += e.getValue();
- }
- totalmap.put(tag, total);
- for(Map.Entry<String, Double> e: emission.get(tag).entrySet()){
- e.setValue(Math.log(e.getValue()/total));
- }
- }
- words_train_input.close();
- tags_train_input.close();
- //transmission
- for (String tag: emission.keySet()) { //put every tag from emission into key of transmission
- transmission.put(tag, new HashMap<String, Double>());
- }
- transmission.put("#", new HashMap<String, Double>()); //hardcoding for transmission ("#")
- for (int i=0; i<wordarraylist.size(); i++) { //looping over entire string array
- if (!transmission.get("#").containsKey(tagarraylist.get(i)[0])) {
- transmission.get("#").put(tagarraylist.get(i)[0], 1.0);
- }
- else {
- transmission.get("#").put(tagarraylist.get(i)[0], transmission.get("#").get(tagarraylist.get(i)[0]) +1.0);
- }
- for (int j=0; j<wordarraylist.get(i).length-1; j++) { //looping over every element of the string array
- if (!transmission.get(tagarraylist.get(i)[j]).containsKey(tagarraylist.get(i)[j+1])) {
- transmission.get(tagarraylist.get(i)[j]).put(tagarraylist.get(i)[j+1], 1.0);
- }
- else {
- transmission.get(tagarraylist.get(i)[j]).put(tagarraylist.get(i)[j+1], transmission.get(tagarraylist.get(i)[j]).get(tagarraylist.get(i)[j+1]) +1.0);
- }
- }
- }
- for(String tag: transmission.keySet()) { // Changing frequency to ln(frequency/total)
- for(Map.Entry<String, Double> e: transmission.get(tag).entrySet()){
- if (tag.equals("#"))
- e.setValue(Math.log(e.getValue()/wordarraylist.size()));
- else
- e.setValue(Math.log(e.getValue()/totalmap.get(tag)));
- }
- }
- }
- catch(IOException e) {
- System.out.println("File does not exist!");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement