Advertisement
Guest User

Untitled

a guest
Mar 2nd, 2015
227
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.81 KB | None | 0 0
  1. package glasgow.teamproject.teamB.TwitIE;
  2.  
  3. import gate.Annotation;
  4. import gate.AnnotationSet;
  5. import gate.Corpus;
  6. import gate.CorpusController;
  7. import gate.Document;
  8. import gate.Factory;
  9. import gate.Gate;
  10. import gate.creole.ExecutionException;
  11. import gate.creole.ResourceInstantiationException;
  12. import gate.persist.PersistenceException;
  13. import gate.util.GateException;
  14. import gate.util.persistence.PersistenceManager;
  15. import glasgow.teamproject.teamB.Util.ProjectProperties;
  16.  
  17. import java.io.File;
  18. import java.io.IOException;
  19. import java.util.ArrayList;
  20. import java.util.HashMap;
  21. import java.util.HashSet;
  22. import java.util.Iterator;
  23.  
  24. import javax.annotation.PostConstruct;
  25.  
  26. import org.json.JSONObject;
  27. import org.springframework.stereotype.Component;
  28.  
  29. @Component
  30. public class TwitIE {
  31. private static HashSet<String> interestedNE = new HashSet<String>();
  32. public final HashSet<String> defaultNE = ProjectProperties.defaultNE;
  33. private boolean accept = false;
  34. private static String currentDir;
  35. private static String pathToApplication;
  36.  
  37. public void addNE(String s) {
  38. interestedNE.add(s);
  39. }
  40.  
  41. // public TwitIE() {
  42. // // TODO Auto-generated constructor stub
  43. // init();
  44. // }
  45.  
  46. @PostConstruct
  47. public void init() {
  48. System.out.println("twitie init");
  49. try {
  50. if (interestedNE.isEmpty()) {
  51. interestedNE.addAll(defaultNE);
  52. }
  53. System.out.println("added NE");
  54.  
  55. // Only god knows how it works
  56. currentDir = getClass().getProtectionDomain().getCodeSource().getLocation().toString();
  57.  
  58. //String currentDir = "/Users/velin/Documents/Workspaces/3_Year/TP3/";
  59. currentDir = currentDir.replace("file:", "").split("\\.")[0] + "TeamBravo";
  60. System.out.println(currentDir);
  61.  
  62. File f = new File(currentDir);
  63. Gate.setGateHome(f);
  64. System.out.println("before gate.init");
  65. Gate.init();
  66. System.out.println("after gate.init");
  67. pathToApplication = currentDir + "/" + "applicationState.xgapp";
  68.  
  69. accept = true;
  70. } catch (GateException e) {
  71. // TODO Auto-generated catch block
  72. e.printStackTrace();
  73. }
  74. //System.out.println("twirie init END");
  75. }
  76.  
  77. public HashMap<String, ArrayList<String>> getNamedEntites(String tweet) {
  78. accept = false;
  79. //System.out.println("getNamedEntities: " + tweet);
  80. JSONObject ob = null;
  81. HashMap<String, ArrayList<String>> NEs = null;
  82. try {
  83. //System.out.println("before JSON");
  84. ob = new JSONObject(tweet);
  85. //System.out.println("after JSON");
  86. //System.out.println("JSON: " + ob);
  87.  
  88. //System.out.println("trying");
  89. NEs = this.processString((String) ob.getString("text"));
  90. //System.out.println("tried");
  91.  
  92. } catch (Exception e) {
  93. // TODO: handle exception
  94. e.printStackTrace();
  95. System.err.println("Something wrong processing the string");
  96. }
  97.  
  98. //System.out.println("END getNamedEntitties");
  99. accept = true;
  100. return NEs;
  101. }
  102.  
  103. public synchronized HashMap<String, ArrayList<String>> processString(String s) throws InterruptedException {
  104. //System.out.println("process String");
  105. Corpus corpus;
  106. CorpusController pipeline;
  107. HashMap<String, ArrayList<String>> NEs = new HashMap<String, ArrayList<String>>();
  108. if (s.isEmpty())
  109. return null;
  110. Document doc = null;
  111.  
  112. try {
  113. pipeline = (CorpusController) PersistenceManager.loadObjectFromFile(new File(pathToApplication));
  114. corpus = Factory.newCorpus("Tweet corpus");
  115. doc = Factory.newDocument(s);
  116. corpus.add(doc);
  117. pipeline.setCorpus(corpus);
  118. pipeline.execute();
  119. } catch (ResourceInstantiationException e) {
  120. // TODO Auto-generated catch block
  121. e.printStackTrace();
  122. } catch (ExecutionException e) {
  123. // That usually happens with cyrillic sentences or tweets containing only other (weird) characters, like one char, etc.
  124. System.out.println("GATE was unable to execute this tweet:");
  125. System.out.println(s);
  126. // TODO Auto-generated catch block
  127. //e.printStackTrace();
  128. } catch (PersistenceException e) {
  129. // TODO Auto-generated catch block
  130. e.printStackTrace();
  131. } catch (IOException e) {
  132. // TODO Auto-generated catch block
  133. e.printStackTrace();
  134. }
  135.  
  136. if (doc != null) {
  137.  
  138. AnnotationSet annotations = doc.getAnnotations();
  139. for (String namedEntity : interestedNE) {
  140. NEs.put(namedEntity, new ArrayList<String>());
  141. }
  142. Iterator<Annotation> itr = annotations.iterator();
  143. while (itr.hasNext()) {
  144. Annotation a = itr.next();
  145. if (!interestedNE.contains(a.getType())) {
  146. continue;
  147. }
  148. ArrayList<String> NEsArray = NEs.get(a.getType());
  149. NEsArray.add(s.substring(a.getStartNode().getOffset().intValue(), a.getEndNode().getOffset().intValue()));
  150. NEs.put(a.getType(), NEsArray);
  151. }
  152. }
  153. // System.out.println("process String END");
  154. return NEs;
  155. }
  156.  
  157. public boolean accept() {
  158. // TODO Auto-generated method stub
  159. return accept;
  160. }
  161.  
  162. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement