Advertisement
Guest User

Untitled

a guest
Dec 18th, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.67 KB | None | 0 0
  1. package ner;
  2.  
  3. import java.awt.FileDialog;
  4. import java.io.File;
  5. import java.io.FileInputStream;
  6. import java.io.FileOutputStream;
  7. import java.io.FileWriter;
  8. import java.io.IOException;
  9. import java.io.InputStream;
  10. import java.io.OutputStream;
  11. import java.io.UnsupportedEncodingException;
  12. import java.nio.file.Files;
  13. import java.nio.file.Paths;
  14.  
  15. import javax.swing.JFrame;
  16.  
  17. import org.apache.jena.query.ParameterizedSparqlString;
  18. import org.apache.jena.query.QueryExecution;
  19. import org.apache.jena.query.QueryExecutionFactory;
  20. import org.apache.jena.query.ResultSet;
  21. import org.apache.jena.query.ResultSetFactory;
  22. import org.apache.jena.rdf.model.Literal;
  23. import org.apache.jena.rdf.model.Model;
  24. import org.apache.jena.rdf.model.ModelFactory;
  25. import org.apache.jena.rdf.model.Property;
  26. import org.apache.jena.rdf.model.RDFWriter;
  27. import org.apache.jena.rdf.model.Resource;
  28. import org.apache.jena.rdf.model.ResourceFactory;
  29. import org.apache.jena.rdf.model.Statement;
  30.  
  31. import javafx.application.Application;
  32. import javafx.beans.property.SimpleStringProperty;
  33. import javafx.collections.FXCollections;
  34. import javafx.collections.ObservableList;
  35. import javafx.event.ActionEvent;
  36. import javafx.event.EventHandler;
  37. import javafx.geometry.Insets;
  38. import javafx.scene.Group;
  39. import javafx.scene.Scene;
  40. import javafx.scene.control.Button;
  41. import javafx.scene.control.Label;
  42. import javafx.scene.control.TableColumn;
  43. import javafx.scene.control.TableColumn.CellEditEvent;
  44. import javafx.scene.control.TableView;
  45. import javafx.scene.control.TextField;
  46. import javafx.scene.control.cell.PropertyValueFactory;
  47. import javafx.scene.control.cell.TextFieldTableCell;
  48. import javafx.scene.layout.HBox;
  49. import javafx.scene.layout.VBox;
  50. import javafx.scene.text.Font;
  51. import javafx.stage.Stage;
  52. import opennlp.tools.namefind.NameFinderME;
  53. import opennlp.tools.namefind.TokenNameFinderModel;
  54. import opennlp.tools.util.Span;
  55.  
  56. /**
  57. * Data: 04.12.2018
  58. * Simple application for recognizing named entities (Person, Location, Organisation) in given text.
  59. * @author Krzysiek
  60. * @version 1.0
  61. */
  62. public class NamedEntityRecog extends Application {
  63.  
  64. /**
  65. * Basic table view.
  66. */
  67. private TableView<Data> table = new TableView<Data>();
  68.  
  69. /**
  70. * Contents of the basic table's view.
  71. */
  72. private static ObservableList<Data> data = FXCollections.observableArrayList();
  73.  
  74. /**
  75. * Sets the view in horizontal mode.
  76. */
  77. final HBox hb = new HBox();
  78.  
  79. /**
  80. * Starting point of the program
  81. * @param args arguments for the program
  82. */
  83. public static void main(String[] args) {
  84. launch(args);
  85. }
  86.  
  87. /* (non-Javadoc)
  88. * @see javafx.application.Application#start(javafx.stage.Stage)
  89. */
  90. @Override
  91. public void start(Stage stage) {
  92. Scene scene = new Scene(new Group());
  93. stage.setTitle("ZTI");
  94. stage.setResizable(false);
  95. stage.setWidth(850);
  96. stage.setHeight(550);
  97.  
  98. final Label label = new Label("Entities");
  99. label.setFont(new Font("Arial", 20));
  100.  
  101. table.setEditable(true);
  102.  
  103. TableColumn namedEntityCol = new TableColumn("identified named entity");
  104. namedEntityCol.setMinWidth(100);
  105. namedEntityCol.setCellValueFactory(new PropertyValueFactory<Data, String>("namedEntity"));
  106.  
  107. TableColumn entityCol = new TableColumn("entity");
  108. entityCol.setMinWidth(100);
  109. entityCol.setCellValueFactory(new PropertyValueFactory<Data, String>("entity"));
  110.  
  111. TableColumn uriCol = new TableColumn("generated URI");
  112. uriCol.setMinWidth(400);
  113. uriCol.setCellValueFactory(new PropertyValueFactory<Data, String>("uri"));
  114.  
  115. TableColumn indicesCol = new TableColumn("indices");
  116. indicesCol.setMinWidth(100);
  117. indicesCol.setCellValueFactory(new PropertyValueFactory<Data, String>("indices"));
  118.  
  119. uriCol.setCellFactory(TextFieldTableCell.forTableColumn());
  120. uriCol.setOnEditCommit(new EventHandler<CellEditEvent<Data, String>>() {
  121. @Override
  122. public void handle(CellEditEvent<Data, String> t) {
  123. ((Data) t.getTableView().getItems().get(t.getTablePosition().getRow())).setUri(t.getNewValue());
  124. }
  125. });
  126.  
  127. table.setItems(data);
  128. table.getColumns().addAll(namedEntityCol, entityCol, uriCol, indicesCol);
  129.  
  130. TextField textField = new TextField();
  131. textField.setPromptText("Text");
  132.  
  133. Button addButton = new Button("Add");
  134. addButton.setOnAction(new EventHandler<ActionEvent>() {
  135. @Override
  136. public void handle(ActionEvent e) {
  137. data.clear();
  138. String text = textField.getText();
  139. try {
  140. words(text);
  141. } catch (IOException ee) {
  142. ee.printStackTrace();
  143. }
  144. }
  145. });
  146.  
  147. Button browseButton = new Button("Browse");
  148. browseButton.setOnAction(new EventHandler<ActionEvent>() {
  149. @Override
  150. public void handle(ActionEvent e) {
  151. data.clear();
  152. FileDialog fd = new FileDialog(new JFrame());
  153. fd.setVisible(true);
  154. String pathToFile = fd.getFiles()[0].getAbsolutePath();
  155. try {
  156. String content = new String(Files.readAllBytes(Paths.get(pathToFile)), "UTF-8");
  157. words(content);
  158. } catch (UnsupportedEncodingException e1) {
  159. System.out.println("Wrong coding of the file");
  160. e1.printStackTrace();
  161. } catch (IOException e1) {
  162. System.out.println("IO Exception");
  163. e1.printStackTrace();
  164. }
  165.  
  166. }
  167. });
  168.  
  169. hb.getChildren().addAll(textField, addButton, browseButton);
  170. hb.setSpacing(3);
  171.  
  172. final VBox vbox = new VBox();
  173. vbox.setSpacing(5);
  174. vbox.setPadding(new Insets(10, 0, 0, 10));
  175. vbox.getChildren().addAll(label, hb, table);
  176.  
  177. ((Group) scene.getRoot()).getChildren().addAll(vbox);
  178.  
  179. stage.setScene(scene);
  180. stage.show();
  181. }
  182.  
  183. public static class Data {
  184.  
  185. /**
  186. * Name of the entity.
  187. */
  188. private final SimpleStringProperty namedEntity;
  189. /**
  190. * Type of the entity (person/location/organization)
  191. */
  192. private final SimpleStringProperty entity;
  193. /**
  194. * Reference to a DBPedia resource for this entity.
  195. */
  196. private final SimpleStringProperty uri;
  197. /**
  198. * Indices indicating the position of the word in the original text.
  199. */
  200. private final SimpleStringProperty indices;
  201.  
  202. private Data(String fName, String lName, String email, String indices) {
  203. this.namedEntity = new SimpleStringProperty(fName);
  204. this.entity = new SimpleStringProperty(lName);
  205. this.uri = new SimpleStringProperty(email);
  206. this.indices = new SimpleStringProperty(indices);
  207. }
  208.  
  209. public String getNamedEntity() {
  210. return namedEntity.get();
  211. }
  212.  
  213. public void setNamedEntity(String fName) {
  214. namedEntity.set(fName);
  215. }
  216.  
  217. public String getEntity() {
  218. return entity.get();
  219. }
  220.  
  221. public void setEntity(String fName) {
  222. entity.set(fName);
  223. }
  224.  
  225. public String getUri() {
  226. return uri.get();
  227. }
  228.  
  229. public void setUri(String fName) {
  230. uri.set(fName);
  231. }
  232.  
  233. public String getIndices() {
  234. return indices.get();
  235. }
  236.  
  237. public void setIndices(String fName) {
  238. indices.set(fName);
  239. }
  240. }
  241.  
  242. /**
  243. * Method splitting the input text and managing the order of processing.
  244. * @param text The text to be processed.
  245. * @throws IOException
  246. */
  247. private void words(String text) throws IOException {
  248. String[] words = text.split("\\W+");
  249.  
  250. try {
  251. new NamedEntityRecog().findName(words);
  252. System.out.println();
  253. } catch (IOException e) {
  254. e.printStackTrace();
  255. }
  256.  
  257. try {
  258. new NamedEntityRecog().findLocation(words);
  259. System.out.println();
  260. } catch (IOException e) {
  261. e.printStackTrace();
  262. }
  263.  
  264. try {
  265. new NamedEntityRecog().findorganization(words);
  266. System.out.println();
  267. } catch (IOException e) {
  268. e.printStackTrace();
  269. }
  270.  
  271. for (int i = 0; i < data.size(); i++) {
  272. new NamedEntityRecog().findDBpedia(data.get(i).getNamedEntity(), data.get(i));
  273. }
  274.  
  275. generateRDF();
  276.  
  277. }
  278.  
  279. /**
  280. * Method adding a new entry in the UI.
  281. * @param span Indices representing the word's position in text.
  282. * @param sentence Original text split up into words.
  283. */
  284. private void AddData(Span span, String[] sentence) {
  285. String entityName = "";
  286. for (int index = span.getStart(); index < span.getEnd(); index++) {
  287. entityName += sentence[index];
  288. if(index != span.getEnd() - 1)
  289. {
  290. entityName += " ";
  291. }
  292. System.out.println(entityName);
  293. }
  294. String[] words = span.toString().split(" ");
  295. System.out.println(words);
  296. data.add(new Data(entityName, words[1], span.toString(), words[0]));
  297. System.out.println(data.size());
  298. }
  299.  
  300.  
  301. /**
  302. * Method finding name entities in text.
  303. * @param sentence Original text split up into words.
  304. * @throws IOException
  305. */
  306. public void findName(String[] sentence) throws IOException {
  307. InputStream is = new FileInputStream("en-ner-person.bin");
  308. TokenNameFinderModel model = new TokenNameFinderModel(is);
  309. is.close();
  310. NameFinderME nameFinder = new NameFinderME(model);
  311. Span nameSpans[] = nameFinder.find(sentence);
  312. for (Span s : nameSpans) {
  313. AddData(s, sentence);
  314. }
  315. }
  316.  
  317. /**
  318. * Method finding location entities in text.
  319. * @param sentence Original text split up into words.
  320. * @throws IOException
  321. */
  322. public void findLocation(String[] sentence) throws IOException {
  323. InputStream is = new FileInputStream("en-ner-location.bin");
  324. TokenNameFinderModel model = new TokenNameFinderModel(is);
  325. is.close();
  326. NameFinderME nameFinder = new NameFinderME(model);
  327. Span nameSpans[] = nameFinder.find(sentence);
  328. for (Span s : nameSpans) {
  329. AddData(s, sentence);
  330. }
  331. }
  332.  
  333. /**
  334. * Method finding organization entities in text.
  335. * @param sentence Original text split up into words.
  336. * @throws IOException
  337. */
  338. public void findorganization(String[] sentence) throws IOException {
  339. InputStream is = new FileInputStream("en-ner-organization.bin");
  340. TokenNameFinderModel model = new TokenNameFinderModel(is);
  341. is.close();
  342. NameFinderME nameFinder = new NameFinderME(model);
  343. Span nameSpans[] = nameFinder.find(sentence);
  344. for (Span s : nameSpans) {
  345. AddData(s, sentence);
  346. }
  347. }
  348.  
  349. /**
  350. * Method trying to find and save a DBPedia reference to a given word to the UI.
  351. * @param word Word to be looked up in DBPedia.
  352. * @param data Reference to the UI.
  353. * @throws IOException
  354. */
  355.  
  356.  
  357. public void findDBpedia(String word, Data data) throws IOException {
  358. System.out.println(word);
  359. ParameterizedSparqlString qs = new ParameterizedSparqlString(
  360. "" + "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"
  361. + "prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
  362. + "prefix dbo: <http://dbpedia.org/ontology/>\n"
  363. + "prefix owl: <http://www.w3.org/2002/07/owl#>\n"
  364. + "prefix foaf: <http://xmlns.com/foaf/0.1/>\n"
  365. + "\n" + "select ?resource where {\n"
  366. + " ?resource rdfs:label ?label .\n"
  367. + " ?resource rdf:type dbo:"+data.getEntity().toString()
  368. .substring(0, 1).toUpperCase()+data.getEntity().toString().substring(1)+" .\n"
  369. + " FILTER (?label=\""+word+"\"@en) \n" + "}");
  370. //Literal example = ResourceFactory.createLangLiteral(word, "en");
  371. //qs.setParam("label", example);
  372. QueryExecution exec = QueryExecutionFactory.sparqlService("http://dbpedia.org/sparql", qs.asQuery());
  373. ResultSet results = ResultSetFactory.copyResults(exec.execSelect());
  374.  
  375. if (results.hasNext()) {
  376. String x = results.next().get("resource").toString();
  377. data.uri.set(x);
  378. }
  379. else
  380. {
  381. data.uri.set("No result");
  382. }
  383. }
  384.  
  385. public void generateRDF() throws IOException {
  386. Model model = ModelFactory.createDefaultModel();
  387.  
  388. for (int i = 0; i < data.size(); i++) {
  389. Resource a1 = model.createResource(data.get(i).getNamedEntity());
  390. Property p1 = model.createProperty("is a");
  391. Resource a2 = model.createProperty(data.get(i).getEntity());
  392. Property p2 = model.createProperty("exists as");
  393. Resource a3 = model.createResource(data.get(i).getUri());
  394. Statement stmt = model.createStatement(a1, p1, a2);
  395. Statement stmt2 = model.createStatement(a1, p2, a3);
  396.  
  397. if (!data.get(i).getUri().equals("No result")) { // add statement only if URI exists
  398. model.add(stmt);
  399. model.add(stmt2);
  400. }
  401.  
  402. }
  403.  
  404. model.write(System.out, "TTL"); // print RDF to console
  405. createRDFFile(model);
  406.  
  407. }
  408.  
  409. public void createRDFFile(Model model) throws IOException
  410. {
  411. File file = new File("C:/Users/Krzysiek/Desktop/pliczek2.rdf");
  412. OutputStream out = new FileOutputStream( file );
  413.  
  414.  
  415. RDFWriter writer = model.getWriter("RDF/XML-ABBREV");
  416. writer.setProperty("showXmlDeclaration","true");
  417. writer.setProperty("tab","8");
  418. writer.setProperty("relativeURIs","same-document,relative");
  419. writer.setProperty("allowBadURIs", "true");
  420.  
  421. try {
  422. writer.write(model, out, "");
  423. out.close();
  424.  
  425. }
  426. finally {
  427. try {
  428. out.close();
  429. }
  430. catch (IOException closeException) {
  431. // ignore
  432. }
  433. }
  434. }
  435.  
  436. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement