Guest User

Untitled

a guest
Feb 25th, 2018
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.61 KB | None | 0 0
  1. /*
  2. * To change this license header, choose License Headers in Project Properties.
  3. * To change this template file, choose Tools | Templates
  4. * and open the template in the editor.
  5. */
  6. package rex1nlp;
  7.  
  8. import java.io.FileInputStream;
  9. import java.io.IOException;
  10. import java.io.InputStream;
  11. import opennlp.tools.namefind.NameFinderME;
  12. import opennlp.tools.namefind.TokenNameFinderModel;
  13. import opennlp.tools.tokenize.Tokenizer;
  14. import opennlp.tools.tokenize.TokenizerME;
  15. import opennlp.tools.tokenize.TokenizerModel;
  16. import opennlp.tools.util.Span;
  17. import org.apache.commons.compress.archivers.dump.InvalidFormatException;
  18. import org.apache.commons.vfs2.FileNotFoundException;
  19. import org.apache.tika.exception.TikaException;
  20. import org.xml.sax.SAXException;
  21.  
  22. /**
  23. *
  24. * @author RexPC
  25. */
  26. public class tikaNLPRex {
  27.  
  28. String Tokens[];
  29.  
  30. public static void main(String[] args) throws IOException, SAXException,
  31. TikaException {
  32.  
  33. tikaNLPRex toi = new tikaNLPRex();
  34.  
  35.  
  36. String cnt;
  37.  
  38. cnt="John is planning to specialize in Electrical Engineering in UC Berkley and pursue a career with IBM.";
  39.  
  40. toi.tokenization(cnt);
  41.  
  42. String names = toi.namefind(toi.Tokens);
  43. String org = toi.orgfind(toi.Tokens);
  44.  
  45. System.out.println("person name is : "+names);
  46. System.out.println("organization name is: "+org);
  47.  
  48. }
  49. public String namefind(String cnt[]) {
  50. InputStream is;
  51. TokenNameFinderModel tnf;
  52. NameFinderME nf;
  53. String sd = "";
  54. try {
  55. is = new FileInputStream(
  56. "C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-ner-person.bin");
  57. tnf = new TokenNameFinderModel(is);
  58. nf = new NameFinderME(tnf);
  59.  
  60. Span sp[] = nf.find(cnt);
  61.  
  62. String a[] = Span.spansToStrings(sp, cnt);
  63. StringBuilder fd = new StringBuilder();
  64. int l = a.length;
  65.  
  66. for (int j = 0; j < l; j++) {
  67. fd = fd.append(a[j]).append("\n");
  68.  
  69. }
  70. sd = fd.toString();
  71.  
  72. } catch (FileNotFoundException e) {
  73. } catch (InvalidFormatException e) {
  74. } catch (IOException e) {
  75. }
  76. return sd;
  77. }
  78.  
  79. public String orgfind(String cnt[]) {
  80. InputStream is;
  81. TokenNameFinderModel tnf;
  82. NameFinderME nf;
  83. String sd = "";
  84. try {
  85. is = new FileInputStream(
  86. "C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-ner-organization.bin");
  87. tnf = new TokenNameFinderModel(is);
  88. nf = new NameFinderME(tnf);
  89. Span sp[] = nf.find(cnt);
  90. String a[] = Span.spansToStrings(sp, cnt);
  91. StringBuilder fd = new StringBuilder();
  92. int l = a.length;
  93.  
  94. for (int j = 0; j < l; j++) {
  95. fd = fd.append(a[j]).append("\n");
  96.  
  97. }
  98.  
  99. sd = fd.toString();
  100.  
  101. } catch (FileNotFoundException e) {
  102. } catch (InvalidFormatException e) {
  103. } catch (IOException e) {
  104. }
  105. return sd;
  106.  
  107. }
  108.  
  109.  
  110. public void tokenization(String tokens) {
  111.  
  112. InputStream is;
  113. TokenizerModel tm;
  114.  
  115. try {
  116. is = new FileInputStream("C:\\Users\\RexPC\\Documents\\Programming\\Apache OpenNLP\\Models\\Original OpenNLP Models\\en-token.bin");
  117. tm = new TokenizerModel(is);
  118. Tokenizer tz = new TokenizerME(tm);
  119. Tokens = tz.tokenize(tokens);
  120. // System.out.println(Tokens[1]);
  121. } catch (IOException e) {
  122. }
  123. }
  124.  
  125. }
Add Comment
Please, Sign In to add comment