Untitled

package corenlp.process;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.nndep.DependencyParser;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.TypedDependency;
import statistics.RAWF;

public class Converter {


	String modelPath ;

	public Converter(String modelPath) {
		this.modelPath = modelPath;

	}

	/**
	 * Read CoNLL-2003
	 * @param path
	 * @param writePath
	 * @throws IOException
	 */
	public void readData(String path, String writePath) throws IOException{
		DependencyParser parser = DependencyParser.loadFromModelFile(this.modelPath);

		BufferedReader br = RAWF.reader(path);
		PrintWriter pw = RAWF.writer(writePath);
		String line = null;
		List<CoreLabel> words = new ArrayList<>();
		ArrayList<String> output = new ArrayList<String>();
		while((line = br.readLine())!=null){
			if(line.equals("")){

				GrammaticalStructure gs = parser.predict(words);
				List<TypedDependency> deps =  new ArrayList<>(gs.typedDependencies());
				int[] heads = new int[words.size()];
				String[] depLabels = new String[words.size()];
				for (TypedDependency dep : deps) {
					heads[dep.dep().index() - 1] = dep.gov().index() - 1;
					depLabels[dep.dep().index() - 1] = dep.reln().getShortName();
//					System.out.println(dep.gov().index() +", " +dep.dep().index()+ ", " + dep.reln().getShortName());
				}
//				System.exit(0);

				for (int p = 0; p < words.size(); p++) {
					CoreLabel word = words.get(p);
					int head = heads[p] + 1;
					pw.println((p+1) + "\t"+  word.word() + "\t_\t" +word.tag()+"\t"+word.tag()+"\t_\t"+head+"\t"+depLabels[p]+"\t_\t_\t"+output.get(p) );
				}
				pw.println();
				words = new ArrayList<CoreLabel>();
				output = new ArrayList<String>();
			} else {
				String[] values = line.split(" ");
				String entity = values[2];
				output.add(entity);
				CoreLabel token = new CoreLabel();
				token.setWord(values[0]);
				token.setTag(values[1]);
				words.add(token);
			}
		}
		br.close();
		pw.close();
	}

	/**
	 * Read CoNLL-2003
	 * @param path
	 * @param writePath
	 * @throws IOException
	 */
	public void readOntoNotes(String path, String writePath) throws IOException{
		DependencyParser parser = DependencyParser.loadFromModelFile(this.modelPath);

		BufferedReader br = RAWF.reader(path);
		PrintWriter pw = RAWF.writer(writePath);
		String line = null;
		List<CoreLabel> words = new ArrayList<>();
		ArrayList<String> output = new ArrayList<String>();
		while((line = br.readLine())!=null){
			if(line.equals("")){

				GrammaticalStructure gs = parser.predict(words);
				List<TypedDependency> deps =  new ArrayList<>(gs.typedDependencies());
				int[] heads = new int[words.size()];
				String[] depLabels = new String[words.size()];
				for (TypedDependency dep : deps) {
					heads[dep.dep().index() - 1] = dep.gov().index() - 1;
					depLabels[dep.dep().index() - 1] = dep.reln().getShortName();
//					System.out.println(dep.gov().index() +", " +dep.dep().index()+ ", " + dep.reln().getShortName());
				}
//				System.exit(0);

				for (int p = 0; p < words.size(); p++) {
					CoreLabel word = words.get(p);
					int head = heads[p] + 1;
					pw.println((p+1) + "\t"+  word.word() + "\t_\t" +word.tag()+"\t"+word.tag()+"\t_\t"+head+"\t"+depLabels[p]+"\t_\t_\t"+output.get(p) );
				}
				pw.println();
				words = new ArrayList<CoreLabel>();
				output = new ArrayList<String>();
			} else {
				String[] values = line.split("\t");
				String word =  values[1];
				String pos = values[3];
				String entity = values[values.length - 1];
				output.add(entity);
				CoreLabel token = new CoreLabel();
				token.setWord(word);
				token.setTag(pos);
				words.add(token);
			}
		}
		br.close();
		pw.close();
	}

	public static void main(String... args) throws IOException {


		String[] x = new String[]{"SD", "UD"};
		for(String type : x) {
			String path = "edu/stanford/nlp/models/parser/nndep/english_"+type+".gz";
			Converter conv = new Converter(path);
//			conv.readData("data/conll2003/train.txt", "data/conll2003/train."+type.toLowerCase()+".conllx");
//			conv.readData("data/conll2003/dev.txt", "data/conll2003/dev."+type.toLowerCase()+".conllx");
//			conv.readData("data/conll2003/test.txt", "data/conll2003/test."+type.toLowerCase()+".conllx");

			conv.readOntoNotes("data/ontonotes/train.sd.conllx", "data/ontonotes/train.pred"+type.toLowerCase()+".conllx");
			conv.readOntoNotes("data/ontonotes/dev.sd.conllx", "data/ontonotes/dev.pred"+type.toLowerCase()+".conllx");
			conv.readOntoNotes("data/ontonotes/test.sd.conllx", "data/ontonotes/test.pred"+type.toLowerCase()+".conllx");
		}

	}

//	public static void main(String... args) {
//		String modelPath = DependencyParser.DEFAULT_MODEL;
//	    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
//
//	    for (int argIndex = 0; argIndex < args.length; ) {
//	      switch (args[argIndex]) {
//	        case "-tagger":
//	          taggerPath = args[argIndex + 1];
//	          argIndex += 2;
//	          break;
//	        case "-model":
//	          modelPath = args[argIndex + 1];
//	          argIndex += 2;
//	          break;
//	        default:
//	          throw new RuntimeException("Unknown argument " + args[argIndex]);
//	      }
//	    }
//
//	    String text = "I can almost always tell when movies use fake dinosaurs.";
//
//	    MaxentTagger tagger = new MaxentTagger(taggerPath);
//
//
////	    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
////	    for (List<HasWord> sentence : tokenizer) {
////	      List<TaggedWord> tagged = tagger.tagSentence(sentence);
////	      GrammaticalStructure gs = parser.predict(tagged);
////	      System.out.println(gs.toString());
////	    }
//
//	    CoreLabel token = new CoreLabel();
//	    token.setWord("I");
//	    token.setTag("NN");
//	    List<CoreLabel> sent = new ArrayList<>();
//	    sent.add(token);
//	    GrammaticalStructure gs = parser.predict(sent);
//	    System.out.println(gs.toString());
//	}
}