tamsenmckerley

markov chain

Apr 6th, 2018
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 6.39 KB | None | 0 0
  1. import java.util.*;
  2. import java.io.*;
  3.  
  4. public class Driver {
  5.     //finds out if an arraylist of strings contains a specific string
  6.     public static boolean contains (ArrayList<String> a, String x) {
  7.         for (int i = 0; i < a.size(); i++) {
  8.             if (a.get(i).equals(x))
  9.                 return true;
  10.         }
  11.         return false;
  12.     }
  13.  
  14.     //finds the index of a string in the top row of a 2d arraylist of strings
  15.     public static int indexinhoriz (ArrayList<ArrayList<String>> m, String s) {
  16.         for (int i = 0; i < m.get(0).size(); i++) {
  17.             if (m.get(0).get(i).equals(s))
  18.                 return i;
  19.         }
  20.         return -1;
  21.     }
  22.      
  23.     //finds the index of a string in the left column of a 2d arraylist of strings
  24.     public static int indexinvert (ArrayList<ArrayList<String>> m, String s) {
  25.         for (int i = 0; i < m.size(); i++) {
  26.             if (m.get(i).get(0).equals(s))
  27.                 return i;
  28.           }
  29.         return -1;
  30.      }
  31.      
  32.     public static void main (String []args) throws IOException {
  33.         //example input:
  34.         String text = "According to all known laws of aviation there is no way a bee should be able to fly.Its wings are too small to get its fat little body off the ground.The bee of course flies anyway because bees don't care what humans think is impossible";
  35.         //the only punctuation you can have in the input string are periods (and apostrophes within words). don't put spaces after periods.
  36.          text = text.toLowerCase();
  37.  
  38.          String[] temp = text.split("\\."); //creates an array where each index holds 1 sentence
  39.          ArrayList<String> swords = new ArrayList<String>(); //words that start sentences
  40.          ArrayList<String> ewords = new ArrayList<String>(); //words that end sentences
  41.          ArrayList<String[]> words = new ArrayList<String[]>(); //array that will hold each individual word
  42.          for (int i = 0; i < temp.length; i++) {
  43.              words.add(temp[i].split(" ")); //each string array in words will represent one sentence, with one index per word
  44.              String[] temptemp = words.get(i); //gets the current string array (sentence)
  45.              swords.add(temptemp[0]); //adds beginning word to swords
  46.              ewords.add(temptemp[temptemp.length-1]); //adds ending word to ewords
  47.          }
  48.  
  49.          ArrayList<String> wordsnorepeats = new ArrayList<String>(); //a list of all the words with no duplicates
  50.          int iofal = 0; //stands for index of arraylist
  51.          while (iofal < words.size()) { //while loop fills in wordsnorepeats
  52.              String[] temptemp = words.get(iofal);
  53.              for (int i = 0; i < temptemp.length; i++) {
  54.                  if (!contains(wordsnorepeats, temptemp[i])) {
  55.                     wordsnorepeats.add(temptemp[i]);
  56.                  }
  57.              }
  58.              iofal++;
  59.          }
  60.  
  61.          /*markov is a 2d arraylist where the top row & left column contain all the words. when looping through a sentence, the left column represents the current word and the top row represents the next word. the other spaces represent the frequencies that two words are next to each other. */
  62.          ArrayList<ArrayList<String>> markov = new ArrayList<ArrayList<String>>(); //where the magic happens!
  63.          wordsnorepeats.add(0, " "); //so that the top left corner of markov is blank
  64.          markov.add(wordsnorepeats); //set top row
  65.          for (int i = 1; i < wordsnorepeats.size(); i++) { //for loop sets the left column and the frequencies
  66.              ArrayList<String> temptemptemp = new ArrayList<String>();
  67.              temptemptemp.add("change"); //will later change to an actual word
  68.              for (int j = 0; j < wordsnorepeats.size()-1; j++) {
  69.                  temptemptemp.add("0"); //frequencies start out at 0
  70.              }
  71.              temptemptemp.set(0, wordsnorepeats.get(i)); //replaces the "change" from before
  72.              markov.add(temptemptemp); //adds the column + frequencies to markov
  73.          }
  74.  
  75.          /*this section fills in the frequencies by looping through the list of words*/
  76.          for (int r = 0; r < words.size(); r++) { //r is row
  77.              String[] t = words.get(r);
  78.              for (int c = 0; c < t.length-1; c++) { //c is column & the for loop goes to t.length-1 because the ending words don't count
  79.                  int data = Integer.parseInt(markov.get(indexinhoriz(markov,t[c+1])).get(indexinvert(markov, t[c])))+1; //gets the current frequency and adds one
  80.                  markov.get(indexinvert(markov, t[c])).set(indexinhoriz(markov,t[c+1]), Integer.toString(data)); //actually sets the frequency value (yay i actually split this line up!!)
  81.              }
  82.          }
  83.  
  84.          /*this section creates the sentence by picking a starting word, going to its row, adding up all the frequencies in it, picking a random value from 1 to the sum of the frequencies, and then going back through and adding the frequencies up again until it reaches the number it picked. the index it ends up on is the index of the word that comes next. */
  85.          String sentence = "";
  86.          String word = swords.get((int)(Math.random()*swords.size())); //picks starting word
  87.          boolean endofsentence = false;
  88.          while (!endofsentence) {
  89.              if (contains(ewords, word)) {
  90.                  int x = (int)(Math.random()*10); //change 10 to change the average length of the sentences
  91.                  if (x == 1 || x == 0 || x == 2)
  92.                     endofsentence = true;
  93.              }
  94.              sentence += word;
  95.              sentence += " ";
  96.              int linesum = 0;
  97.              ArrayList<String> t2 = markov.get(indexinvert(markov, word)); //gets current column
  98.              //System.out.println(t2);
  99.              for (int i = 1; i < t2.size(); i++) {
  100.                  linesum += Integer.parseInt(t2.get(i)); //adds up all frequencies
  101.              }
  102.              int wordindex = (int)(Math.random() * linesum)+1;
  103.              int currentindex = 0;
  104.              int currentcount = 0;
  105.              if (linesum == 0) { //to prevent index out of bounds errors
  106.                  endofsentence = true;
  107.              } else {
  108.                  while (currentcount < wordindex && currentindex < t2.size()) {
  109.                      currentindex++; //actual index of the word
  110.                      currentcount += Integer.parseInt(t2.get(currentindex)); //sum of frequencies so far
  111.                  }
  112.                  word = markov.get(0).get(currentindex); //sets the new word
  113.              }
  114.          }
  115.          sentence = sentence.substring(0, sentence.length()-1) + "."; //deletes space from the end and adds a period
  116.          System.out.println(sentence);
  117.          
  118.        }
  119. }
Add Comment
Please, Sign In to add comment