Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package ner;
- import java.awt.FileDialog;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.FileWriter;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.UnsupportedEncodingException;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import javax.swing.JFrame;
- import org.apache.jena.query.ParameterizedSparqlString;
- import org.apache.jena.query.QueryExecution;
- import org.apache.jena.query.QueryExecutionFactory;
- import org.apache.jena.query.ResultSet;
- import org.apache.jena.query.ResultSetFactory;
- import org.apache.jena.rdf.model.Literal;
- import org.apache.jena.rdf.model.Model;
- import org.apache.jena.rdf.model.ModelFactory;
- import org.apache.jena.rdf.model.Property;
- import org.apache.jena.rdf.model.RDFWriter;
- import org.apache.jena.rdf.model.Resource;
- import org.apache.jena.rdf.model.ResourceFactory;
- import org.apache.jena.rdf.model.Statement;
- import javafx.application.Application;
- import javafx.beans.property.SimpleStringProperty;
- import javafx.collections.FXCollections;
- import javafx.collections.ObservableList;
- import javafx.event.ActionEvent;
- import javafx.event.EventHandler;
- import javafx.geometry.Insets;
- import javafx.scene.Group;
- import javafx.scene.Scene;
- import javafx.scene.control.Button;
- import javafx.scene.control.Label;
- import javafx.scene.control.TableColumn;
- import javafx.scene.control.TableColumn.CellEditEvent;
- import javafx.scene.control.TableView;
- import javafx.scene.control.TextField;
- import javafx.scene.control.cell.PropertyValueFactory;
- import javafx.scene.control.cell.TextFieldTableCell;
- import javafx.scene.layout.HBox;
- import javafx.scene.layout.VBox;
- import javafx.scene.text.Font;
- import javafx.stage.Stage;
- import opennlp.tools.namefind.NameFinderME;
- import opennlp.tools.namefind.TokenNameFinderModel;
- import opennlp.tools.util.Span;
- /**
- * Data: 04.12.2018
- * Simple application for recognizing named entities (Person, Location, Organisation) in given text.
- * @author Krzysiek
- * @version 1.0
- */
- public class NamedEntityRecog extends Application {
- /**
- * Basic table view.
- */
- private TableView<Data> table = new TableView<Data>();
- /**
- * Contents of the basic table's view.
- */
- private static ObservableList<Data> data = FXCollections.observableArrayList();
- /**
- * Sets the view in horizontal mode.
- */
- final HBox hb = new HBox();
- /**
- * Starting point of the program
- * @param args arguments for the program
- */
- public static void main(String[] args) {
- launch(args);
- }
- /* (non-Javadoc)
- * @see javafx.application.Application#start(javafx.stage.Stage)
- */
- @Override
- public void start(Stage stage) {
- Scene scene = new Scene(new Group());
- stage.setTitle("ZTI");
- stage.setResizable(false);
- stage.setWidth(850);
- stage.setHeight(550);
- final Label label = new Label("Entities");
- label.setFont(new Font("Arial", 20));
- table.setEditable(true);
- TableColumn namedEntityCol = new TableColumn("identified named entity");
- namedEntityCol.setMinWidth(100);
- namedEntityCol.setCellValueFactory(new PropertyValueFactory<Data, String>("namedEntity"));
- TableColumn entityCol = new TableColumn("entity");
- entityCol.setMinWidth(100);
- entityCol.setCellValueFactory(new PropertyValueFactory<Data, String>("entity"));
- TableColumn uriCol = new TableColumn("generated URI");
- uriCol.setMinWidth(400);
- uriCol.setCellValueFactory(new PropertyValueFactory<Data, String>("uri"));
- TableColumn indicesCol = new TableColumn("indices");
- indicesCol.setMinWidth(100);
- indicesCol.setCellValueFactory(new PropertyValueFactory<Data, String>("indices"));
- uriCol.setCellFactory(TextFieldTableCell.forTableColumn());
- uriCol.setOnEditCommit(new EventHandler<CellEditEvent<Data, String>>() {
- @Override
- public void handle(CellEditEvent<Data, String> t) {
- ((Data) t.getTableView().getItems().get(t.getTablePosition().getRow())).setUri(t.getNewValue());
- }
- });
- table.setItems(data);
- table.getColumns().addAll(namedEntityCol, entityCol, uriCol, indicesCol);
- TextField textField = new TextField();
- textField.setPromptText("Text");
- Button addButton = new Button("Add");
- addButton.setOnAction(new EventHandler<ActionEvent>() {
- @Override
- public void handle(ActionEvent e) {
- data.clear();
- String text = textField.getText();
- try {
- words(text);
- } catch (IOException ee) {
- ee.printStackTrace();
- }
- }
- });
- Button browseButton = new Button("Browse");
- browseButton.setOnAction(new EventHandler<ActionEvent>() {
- @Override
- public void handle(ActionEvent e) {
- data.clear();
- FileDialog fd = new FileDialog(new JFrame());
- fd.setVisible(true);
- String pathToFile = fd.getFiles()[0].getAbsolutePath();
- try {
- String content = new String(Files.readAllBytes(Paths.get(pathToFile)), "UTF-8");
- words(content);
- } catch (UnsupportedEncodingException e1) {
- System.out.println("Wrong coding of the file");
- e1.printStackTrace();
- } catch (IOException e1) {
- System.out.println("IO Exception");
- e1.printStackTrace();
- }
- }
- });
- hb.getChildren().addAll(textField, addButton, browseButton);
- hb.setSpacing(3);
- final VBox vbox = new VBox();
- vbox.setSpacing(5);
- vbox.setPadding(new Insets(10, 0, 0, 10));
- vbox.getChildren().addAll(label, hb, table);
- ((Group) scene.getRoot()).getChildren().addAll(vbox);
- stage.setScene(scene);
- stage.show();
- }
- public static class Data {
- /**
- * Name of the entity.
- */
- private final SimpleStringProperty namedEntity;
- /**
- * Type of the entity (person/location/organization)
- */
- private final SimpleStringProperty entity;
- /**
- * Reference to a DBPedia resource for this entity.
- */
- private final SimpleStringProperty uri;
- /**
- * Indices indicating the position of the word in the original text.
- */
- private final SimpleStringProperty indices;
- private Data(String fName, String lName, String email, String indices) {
- this.namedEntity = new SimpleStringProperty(fName);
- this.entity = new SimpleStringProperty(lName);
- this.uri = new SimpleStringProperty(email);
- this.indices = new SimpleStringProperty(indices);
- }
- public String getNamedEntity() {
- return namedEntity.get();
- }
- public void setNamedEntity(String fName) {
- namedEntity.set(fName);
- }
- public String getEntity() {
- return entity.get();
- }
- public void setEntity(String fName) {
- entity.set(fName);
- }
- public String getUri() {
- return uri.get();
- }
- public void setUri(String fName) {
- uri.set(fName);
- }
- public String getIndices() {
- return indices.get();
- }
- public void setIndices(String fName) {
- indices.set(fName);
- }
- }
- /**
- * Method splitting the input text and managing the order of processing.
- * @param text The text to be processed.
- * @throws IOException
- */
- private void words(String text) throws IOException {
- String[] words = text.split("\\W+");
- try {
- new NamedEntityRecog().findName(words);
- System.out.println();
- } catch (IOException e) {
- e.printStackTrace();
- }
- try {
- new NamedEntityRecog().findLocation(words);
- System.out.println();
- } catch (IOException e) {
- e.printStackTrace();
- }
- try {
- new NamedEntityRecog().findorganization(words);
- System.out.println();
- } catch (IOException e) {
- e.printStackTrace();
- }
- for (int i = 0; i < data.size(); i++) {
- new NamedEntityRecog().findDBpedia(data.get(i).getNamedEntity(), data.get(i));
- }
- generateRDF();
- }
- /**
- * Method adding a new entry in the UI.
- * @param span Indices representing the word's position in text.
- * @param sentence Original text split up into words.
- */
- private void AddData(Span span, String[] sentence) {
- String entityName = "";
- for (int index = span.getStart(); index < span.getEnd(); index++) {
- entityName += sentence[index];
- if(index != span.getEnd() - 1)
- {
- entityName += " ";
- }
- System.out.println(entityName);
- }
- String[] words = span.toString().split(" ");
- System.out.println(words);
- data.add(new Data(entityName, words[1], span.toString(), words[0]));
- System.out.println(data.size());
- }
- /**
- * Method finding name entities in text.
- * @param sentence Original text split up into words.
- * @throws IOException
- */
- public void findName(String[] sentence) throws IOException {
- InputStream is = new FileInputStream("en-ner-person.bin");
- TokenNameFinderModel model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span nameSpans[] = nameFinder.find(sentence);
- for (Span s : nameSpans) {
- AddData(s, sentence);
- }
- }
- /**
- * Method finding location entities in text.
- * @param sentence Original text split up into words.
- * @throws IOException
- */
- public void findLocation(String[] sentence) throws IOException {
- InputStream is = new FileInputStream("en-ner-location.bin");
- TokenNameFinderModel model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span nameSpans[] = nameFinder.find(sentence);
- for (Span s : nameSpans) {
- AddData(s, sentence);
- }
- }
- /**
- * Method finding organization entities in text.
- * @param sentence Original text split up into words.
- * @throws IOException
- */
- public void findorganization(String[] sentence) throws IOException {
- InputStream is = new FileInputStream("en-ner-organization.bin");
- TokenNameFinderModel model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span nameSpans[] = nameFinder.find(sentence);
- for (Span s : nameSpans) {
- AddData(s, sentence);
- }
- }
- /**
- * Method trying to find and save a DBPedia reference to a given word to the UI.
- * @param word Word to be looked up in DBPedia.
- * @param data Reference to the UI.
- * @throws IOException
- */
- public void findDBpedia(String word, Data data) throws IOException {
- System.out.println(word);
- ParameterizedSparqlString qs = new ParameterizedSparqlString(
- "" + "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n"
- + "prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
- + "prefix dbo: <http://dbpedia.org/ontology/>\n"
- + "prefix owl: <http://www.w3.org/2002/07/owl#>\n"
- + "prefix foaf: <http://xmlns.com/foaf/0.1/>\n"
- + "\n" + "select ?resource where {\n"
- + " ?resource rdfs:label ?label .\n"
- + " ?resource rdf:type dbo:"+data.getEntity().toString()
- .substring(0, 1).toUpperCase()+data.getEntity().toString().substring(1)+" .\n"
- + " FILTER (?label=\""+word+"\"@en) \n" + "}");
- //Literal example = ResourceFactory.createLangLiteral(word, "en");
- //qs.setParam("label", example);
- QueryExecution exec = QueryExecutionFactory.sparqlService("http://dbpedia.org/sparql", qs.asQuery());
- ResultSet results = ResultSetFactory.copyResults(exec.execSelect());
- if (results.hasNext()) {
- String x = results.next().get("resource").toString();
- data.uri.set(x);
- }
- else
- {
- data.uri.set("No result");
- }
- }
- public void generateRDF() throws IOException {
- Model model = ModelFactory.createDefaultModel();
- for (int i = 0; i < data.size(); i++) {
- Resource a1 = model.createResource(data.get(i).getNamedEntity());
- Property p1 = model.createProperty("is a");
- Resource a2 = model.createProperty(data.get(i).getEntity());
- Property p2 = model.createProperty("exists as");
- Resource a3 = model.createResource(data.get(i).getUri());
- Statement stmt = model.createStatement(a1, p1, a2);
- Statement stmt2 = model.createStatement(a1, p2, a3);
- if (!data.get(i).getUri().equals("No result")) { // add statement only if URI exists
- model.add(stmt);
- model.add(stmt2);
- }
- }
- model.write(System.out, "TTL"); // print RDF to console
- createRDFFile(model);
- }
- public void createRDFFile(Model model) throws IOException
- {
- File file = new File("C:/Users/Krzysiek/Desktop/pliczek2.rdf");
- OutputStream out = new FileOutputStream( file );
- RDFWriter writer = model.getWriter("RDF/XML-ABBREV");
- writer.setProperty("showXmlDeclaration","true");
- writer.setProperty("tab","8");
- writer.setProperty("relativeURIs","same-document,relative");
- writer.setProperty("allowBadURIs", "true");
- try {
- writer.write(model, out, "");
- out.close();
- }
- finally {
- try {
- out.close();
- }
- catch (IOException closeException) {
- // ignore
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement