Advertisement
Guest User

Merkev CHain generbater

a guest
Mar 22nd, 2017
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 10.79 KB | None | 0 0
  1. package markov;
  2.  
  3. import java.io.File;
  4. import java.io.IOException;
  5. import java.util.ArrayList;
  6. import java.util.Arrays;
  7. import java.util.Collections;
  8. import java.util.HashMap;
  9. import java.util.Random;
  10. import java.util.Scanner;
  11.  
  12. /**
  13.  * A Class for generating Markov sentence chains from an input locus. Markov
  14.  * chains are a relational probabilistic data sets, and in this case are used
  15.  * to generate sentences from an input chain. This class is taken from a small
  16.  * project that originally powered a twitter bot.
  17.  *
  18.  * @author Mason C***** <Mason.C***** at s*****.edu>
  19.  */
  20. public class Markov {
  21.  
  22.     /**
  23.      * The HashMap containing the Markov chain itself. Being relational, the
  24.      * chain contains data for preceding words and for a single word following.
  25.      *
  26.      * Ex) With a prefix size of 2, a chain might look as such:
  27.      * The Quick | Brown
  28.      * Quick Brown | Fox
  29.      * The Quick | gunslinger
  30.      *
  31.      * If the chain generates "The Quick" at any point, there will be a 50/50
  32.      * percent chance of it outputting "Brown" or "gunslinger" next.
  33.      */
  34.     private final HashMap<String, ArrayList<String>> markovChain;
  35.     private final Random rnd;
  36.     private int prefixLength = 20;
  37.    
  38.     public static void main(String[] args) throws IOException {
  39.         // Create the first two entries (k:_start, k:_end)
  40.         Scanner in = new Scanner(System.in);
  41.         int prefixSize;
  42.  
  43.         //Input handling
  44.         while (true) {
  45.             try {
  46.                 System.out.print("Enter prefix size: ");
  47.                 //prefixSize = in.nextInt();
  48.                 prefixSize = Integer.parseInt(in.nextLine());
  49.                 break;
  50.             } catch (NumberFormatException e) {
  51.                 System.out.println("Invalid input. Must be an integer.");
  52.             }
  53.         }
  54.  
  55.         int sentences;
  56.         while (true) {
  57.             try {
  58.                 System.out.print("Enter number of sentences to generate: ");
  59.                 sentences = Integer.parseInt(in.nextLine());
  60.                 break;
  61.             } catch (NumberFormatException e) {
  62.                 System.out.println("Invalid input. Must be an integer.");
  63.             }
  64.         }
  65.         Markov markov = new Markov(prefixSize);
  66.  
  67.         while (true) {
  68.             /**
  69.              * Get words. The larger the input locus, the better! A prefix size
  70.              * of 1 should have a locus of a couple sentences, a size of 2 needs
  71.              * several pages of text, and 3 requires the entire text of Moby
  72.              * Dick to not seem deterministic.
  73.              *
  74.              */
  75.  
  76.             Scanner fileIn;
  77.             while (true) {
  78.                 try {
  79.                     System.out.print("Enter the file to read: ");
  80.                     File file = new File(in.nextLine());
  81.                     fileIn = new Scanner(file, "UTF-8");
  82.                     break;
  83.                 } catch (IOException e) {
  84.                     System.out.println("File could not be read: " + e.getMessage());
  85.                 }
  86.             }
  87.  
  88.             //pun unintentional
  89.             StringBuilder bodyBuilder = new StringBuilder();
  90.             //Read file to end
  91.             while (fileIn.hasNext()) {
  92.                 bodyBuilder.append(fileIn.nextLine()).append(" ");
  93.             }
  94.             markov.addWords(bodyBuilder.toString());
  95.             markov.generateSentence(sentences);
  96.         }
  97.     }
  98.  
  99.     public Markov(int prefixLength) {
  100.         rnd = new Random();
  101.         this.prefixLength = prefixLength;
  102.  
  103.         //Instantiate the HashMap and prep it for generation
  104.         markovChain = new HashMap();
  105.         //Special cases for beginnings and ends of sentences
  106.         markovChain.put("_start", new ArrayList());
  107.         markovChain.put("_end", new ArrayList());
  108.     }
  109.  
  110.     public synchronized void addWords(String words) {
  111.         //C:\Users\mrsma\OneDrive\Documents\sherlock holmes.txt
  112.         String[] sentences = words.split("(?<=[.\\?\\!])");
  113.         System.out.println("Adding " + sentences.length + " sentences to the chain and generating output...");
  114.         for (int i = 0; i < sentences.length; i++) {
  115.             String sentence = sentences[i];
  116.             addSentence(sentence.trim());
  117.         }
  118.     }
  119.  
  120.     /**
  121.      * Add a sentence to the Markov Chain. Loops through every word in the
  122.      * sentence, and adds to the Markov chain based on the length of
  123.      * prefixLength
  124.      *
  125.      * @param sentence the sentence to add do the Markov chain
  126.      */
  127.     public synchronized void addSentence(String sentence) {
  128.         String[] words = sentence.split(" ");
  129.  
  130.         //Early return for empty strings
  131.         if (words.length < prefixLength) {
  132.             return;
  133.         }
  134.  
  135.         //The prefix being added to the Markov Chain
  136.         String prefix;
  137.         StringBuilder prefixBuilder = new StringBuilder();
  138.  
  139.         for (int i = 0; i < words.length - prefixLength; i++) {
  140.              prefixBuilder.setLength(0);
  141.             for (int u = 0; u < prefixLength; u++) {
  142.                 //Add words to prefix
  143.                 prefixBuilder.append(words[i + u]).append(" ");
  144.             }
  145.             prefix = prefixBuilder.toString();
  146.             //Skip empty prefixes
  147.             if (prefix.isEmpty() || prefix.equals(" ")) {
  148.                 continue;
  149.             }
  150.             //Normalize the prefix to remove punctuation
  151.             String trimmed = prefix.replaceAll("[^A-Za-z0-9 ]", "").trim();
  152.  
  153.             //continue if the prefix is invalid
  154.             if (trimmed == null
  155.                     || trimmed.isEmpty()
  156.                     || trimmed.split(" ").length != prefixLength) {
  157.                 continue;
  158.             }
  159.  
  160.             if (i == 0) {
  161.                 //Words in the _start section begin sentences
  162.                 ArrayList<String> startWords = markovChain.get("_start");
  163.                 startWords.add(prefix.trim());
  164.             }
  165.  
  166.             //Adds the next word to the suffix pool at the given prefix
  167.             ArrayList<String> suffix = markovChain.get(prefix);
  168.  
  169.             //If the suffix list is not yet instantiated for this entry, create a new one
  170.             if (suffix == null) {
  171.                 suffix = new ArrayList();
  172.             }
  173.  
  174.             //Add to the suffix and then add it to the chain
  175.             suffix.add(words[i + prefixLength].trim());
  176.             markovChain.put(prefix.trim().toLowerCase(), suffix);
  177.         }
  178.         //Words in the _end category end sentences
  179.         ArrayList<String> endWords = markovChain.get("_end");
  180.         endWords.add(words[words.length - 1].trim());
  181.        
  182.     }
  183.  
  184.     /**
  185.      * Generate a phrase using the Markov chain with given sentence length
  186.      *
  187.      * @param sentences how many sentences to generate
  188.      */
  189.     public synchronized void generateSentence(int sentences) {
  190.  
  191.         //Early return if chain is built incorrectly
  192.         if (markovChain.size() < 1) {
  193.             return;
  194.         }
  195.  
  196.         //ArrayList containing constructed return phrase
  197.         ArrayList<String> newPhrase = new ArrayList();
  198.  
  199.         //All possible sentence beginnings from the locus. If none, terminate.
  200.         ArrayList<String> startWords = markovChain.get("_start");
  201.         int startWordsLen = startWords.size();
  202.  
  203.         ArrayList<String> endWords = markovChain.get("_end");
  204.         int endWordsLen = endWords.size();
  205.  
  206.         if (startWordsLen < 1 || endWordsLen < 0) {
  207.             return;
  208.         }
  209.  
  210.         //Grabs random sentance start
  211.         String nextWord = startWords.get(rnd.nextInt(startWordsLen));
  212.  
  213.         newPhrase.add(nextWord);
  214.         ArrayList<String> previousWords = new ArrayList(Arrays.asList(nextWord.split(" ")));
  215.         Collections.reverse(previousWords);
  216.         previousWords.ensureCapacity(prefixLength);
  217.         String prefix;
  218.         //Cache for attempted sentence beginnings.
  219.         String addPrevious = null;
  220.         StringBuilder prefixBuilder = new StringBuilder();
  221.        
  222.         // Keep looping through the words until all sentences are constructed
  223.         for (int i = 0; i < sentences;) {
  224.             prefixBuilder.setLength(0);
  225.             //Cut down words list to be same length as prefix size
  226.             previousWords = new ArrayList(previousWords.subList(0, prefixLength));
  227.  
  228.             //Build a prefix
  229.             for (int u = previousWords.size() - 1; u >= 0; u--) {
  230.                 prefixBuilder.append(previousWords.get(u)).append(" ");
  231.             }
  232.             prefix = prefixBuilder.toString().trim();
  233.  
  234.             ArrayList<String> wordSelection = markovChain.get(prefix.toLowerCase());
  235.             //If there is no text within the prefix, then get rid of the sentence
  236.             //beginning and start a new sentence.
  237.             if (wordSelection == null || wordSelection.isEmpty()) {
  238.                 //Remove the prefix from startWords and markovChain, if they exist there
  239.                 startWords.remove(prefix);
  240.                 markovChain.remove(prefix.toLowerCase());
  241.                 //Get a random new next word set to begin a new sentence
  242.                 nextWord = startWords.get(rnd.nextInt(startWordsLen));
  243.                 previousWords = new ArrayList(Arrays.asList(nextWord.split(" ")));
  244.                 //Reverse the set to match the insertion order
  245.                 Collections.reverse(previousWords);
  246.                 //Cache previous words
  247.                 addPrevious = nextWord;
  248.                 continue;
  249.             }else{
  250.                 //If sentence beginning is valid and there was a sentence reattempt,
  251.                 //carry the cached beginning into the phrase.
  252.                 if(addPrevious != null){
  253.                     newPhrase.add(addPrevious);
  254.                     addPrevious = null;
  255.                 }
  256.             }
  257.            
  258.             //Randomly select a word from the prefix's pool. More common words will
  259.             //appear multiple times, making them more probable.
  260.             int wordSelectionLen = wordSelection.size();
  261.             nextWord = wordSelection.get(rnd.nextInt(wordSelectionLen));
  262.             previousWords.add(0, nextWord);
  263.  
  264.             int length = nextWord.length();
  265.             if (length > 0) {
  266.                 newPhrase.add(nextWord);
  267.                 //Did the sentence terminate? If so, incriment
  268.                 if (nextWord.matches("(.*?)[\\.\\?\\!](.*?)")) {
  269.                     i++;
  270.                 }
  271.             }
  272.            
  273.         }
  274.         StringBuilder outputBuilder = new StringBuilder();
  275.         for (int i = 0; i < newPhrase.size(); i++) {
  276.             String word = newPhrase.get(i);
  277.             outputBuilder.append(word).append(" ");
  278.             if (i % 20 == 0 && i != 0) {
  279.                 outputBuilder.append("\n");
  280.             }
  281.         }
  282.         System.out.println(outputBuilder.toString());
  283.     }
  284.     public static String output;
  285.  
  286. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement