Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Random;
- import java.util.Scanner;
- import java.util.Set;
- public class text_generator_mk2 {
- private ArrayList<Word2> training;
- private ArrayList<Word2> firstWords;
- private static final int WORD_CAP=150;
- public text_generator_mk2() {
- training = new ArrayList<Word2>();
- firstWords = new ArrayList<Word2>();
- }
- public void train(File text){
- PartOfSpeechLabel posl = new PartOfSpeechLabel("partsofspeech/91K nouns.txt","partsofspeech/31K verbs.txt",
- "partsofspeech/28K adjectives.txt","partsofspeech/6K adverbs.txt",
- "partsofspeech/3 articles.txt","partsofspeech/150 prepositions.txt",
- "partsofspeech/46 conjuctions.txt","partsofspeech/77 pronouns.txt");
- ArrayList<Word2> a = new ArrayList<Word2>();
- try {
- Scanner scanner = new Scanner(text);
- if (scanner.hasNext()) {
- String word = scanner.next();
- Word2 w = new Word2(word);
- a.add(w);
- firstWords.add(w);
- }
- while (scanner.hasNext()) {
- Word2 w2 = new Word2(scanner.next());
- a.add(w2);
- //System.out.println(w2.toString()+ " " + isEndOfSentence(w2));
- if(isEndOfSentence(w2)){
- Sentence alpha = new Sentence(a);
- //System.out.println(alpha);
- posl.partOfSpeech(alpha);
- for(Word2 v: alpha.getSentence()){
- training.add(v);
- }
- a = new ArrayList<Word2>();
- }
- }
- training.add(new Word2("EOF"));
- scanner.close();
- }
- catch (FileNotFoundException ex) {
- ex.printStackTrace();
- }
- }
- public void generate() throws IOException {
- File file = new File("speech.txt");
- FileWriter fw = new FileWriter(file);
- BufferedWriter bff = new BufferedWriter(fw);
- Map<Word2, Set<Word2>> map = getMap();
- Map<Word2, ArrayList<Word2>> newMap = this.expandValues(map);
- Random r = new Random();
- int rand = r.nextInt(firstWords.size());
- Word2 word = firstWords.get(rand);
- bff.write(word + " ");
- int loc = rand;
- int count = 1;
- ArrayList<Word2> temp = new ArrayList<Word2>();
- while (hasNextWord(newMap,word) && !word.equals(new Word2("EOF"))&&count<WORD_CAP) {
- partsofspeech pos;
- if(training.get(loc+1).getPos().size()==0){
- pos = partsofspeech.UNKNOWN;
- }
- else{
- pos = training.get(loc+1).getPos().get(0);
- }
- if(pos.equals(partsofspeech.UNKNOWN)||pos.equals(partsofspeech.ARTICLE)||pos.equals(partsofspeech.CONJUNCTION)){
- word = training.get(loc+1);
- }
- else{
- word = this.getNextWord(newMap,word,r,pos);
- }
- temp.add(word);
- if(isEndOfSentence(word)){
- Sentence s = new Sentence(temp);
- if(s.isProbableSentence()) bff.write(s + "\r\n");
- else{
- count-=temp.size();
- }
- temp = new ArrayList<Word2>();
- }
- //System.out.println(word);
- loc++;
- count++;
- }
- bff.close();
- }
- private Word2 getNextWord(Map<Word2, ArrayList<Word2>> newMap, Word2 word,Random r,partsofspeech s) {
- ArrayList<Word2> list = newMap.get(word);
- //System.out.println(list.size());
- ArrayList<Word2> edit = removeNon(list,s);
- //System.out.println(edit.size());
- return edit.get(r.nextInt(edit.size()));
- }
- private boolean hasNextWord(Map<Word2, ArrayList<Word2>> newMap, Word2 word) {
- return newMap.containsKey(word) && newMap.get(word).size() > 0;
- }
- private Map<Word2, Set<Word2>> getMap() {
- HashMap<Word2,Set<Word2>> map = new HashMap<Word2,Set<Word2>>();
- for (int k = 0; k < training.size()-1; k++) {
- if (!map.containsKey(training.get(k))) {
- HashSet<Word2> set = new HashSet<Word2>();
- set.add(training.get(k+1));
- map.put(training.get(k), set);
- }
- else {
- if (!map.get(training.get(k)).contains(training.get(k+1))) {
- map.get(training.get(k)).add(training.get(k+1));
- }
- else {
- Iterator<Word2> it = map.get(training.get(k)).iterator();
- while (it.hasNext()) {
- Word2 w = it.next();
- if (w.equals(training.get(k+1))) {
- w.increment();
- break;
- }
- }
- }
- }
- }
- return map;
- }
- private Map<Word2, ArrayList<Word2>> expandValues(Map<Word2, Set<Word2>> map) {
- Map<Word2,ArrayList<Word2>> m = new HashMap<Word2,ArrayList<Word2>>();
- for (Word2 s: map.keySet()) {
- ArrayList<Word2> list = new ArrayList<Word2>();
- for (Word2 w: map.get(s)) {
- int count = 0;
- while (count < w.getCount()) {
- list.add(w);
- count++;
- }
- }
- m.put(s,list);
- }
- return m;
- }
- private boolean isEndOfSentence(Word2 s){
- if(s.getWord().contains("."))return true;
- return false;
- }
- private ArrayList<Word2> removeNon(ArrayList<Word2> w, partsofspeech p){
- ArrayList<Word2> toReturn = new ArrayList<Word2>();
- for(Word2 v : w){
- boolean yes = false;
- for(partsofspeech i : v.getPos()){
- if(i.equals(p)) yes = true;
- }
- if(yes) toReturn.add(v);
- }
- if(toReturn.size()==0){
- return w;
- }
- return toReturn;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement