Guest User

Untitled

a guest
Jun 24th, 2018
205
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.64 KB | None | 0 0
  1. Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
  2. ===================================================================
  3. --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (revision 1244546)
  4. +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (working copy)
  5. @@ -44,19 +44,17 @@
  6. * analyzes the tokenizer input using the given analysis engine
  7. *
  8. * @param analysisEngine the AE to use for analyzing the tokenizer input
  9. - * @return CAS with extracted metadata (UIMA annotations, feature structures)
  10. + * @param CAS to fill with extracted metadata (UIMA annotations, feature structures)
  11. * @throws ResourceInitializationException
  12. *
  13. * @throws AnalysisEngineProcessException
  14. * @throws IOException
  15. */
  16. - protected CAS analyzeInput(AnalysisEngine analysisEngine) throws ResourceInitializationException,
  17. + protected void analyzeInput(AnalysisEngine analysisEngine, CAS cas) throws ResourceInitializationException,
  18. AnalysisEngineProcessException, IOException {
  19. - CAS cas = analysisEngine.newCAS();
  20. + cas.reset();
  21. cas.setDocumentText(toString(input));
  22. analysisEngine.process(cas);
  23. - analysisEngine.destroy();
  24. - return cas;
  25. }
  26.  
  27. private String toString(Reader reader) throws IOException {
  28. Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
  29. ===================================================================
  30. --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (revision 1244546)
  31. +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (working copy)
  32. @@ -44,6 +44,9 @@
  33. private final String tokenTypeString;
  34.  
  35. private final String descriptorPath;
  36. +
  37. + private final AnalysisEngine ae; // nocommit: make superclasses of BaseUIMATokenizer set this?
  38. + private final CAS cas; // nocommit: make superclasses of BaseUIMATOkenizer set this?
  39.  
  40. private int finalOffset = 0;
  41.  
  42. @@ -53,12 +56,17 @@
  43. this.termAttr = addAttribute(CharTermAttribute.class);
  44. this.offsetAttr = addAttribute(OffsetAttribute.class);
  45. this.descriptorPath = descriptorPath;
  46. + try {
  47. + ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
  48. + cas = ae.newCAS();
  49. + } catch (ResourceInitializationException e) {
  50. + throw new RuntimeException(e);
  51. + }
  52. }
  53.  
  54. private void analyzeText(String descriptorPath) throws IOException, ResourceInitializationException,
  55. AnalysisEngineProcessException {
  56. - AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
  57. - CAS cas = analyzeInput(ae);
  58. + analyzeInput(ae, cas);
  59. finalOffset = correctOffset(cas.getDocumentText().length());
  60. Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
  61. iterator = cas.getAnnotationIndex(tokenType).iterator();
  62. Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
  63. ===================================================================
  64. --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (revision 1244546)
  65. +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (working copy)
  66. @@ -54,6 +54,9 @@
  67. private final String typeAttributeFeaturePath;
  68.  
  69. private FeaturePath featurePath;
  70. +
  71. + private final AnalysisEngine ae; // nocommit: make superclasses of BaseUIMATokenizer set this?
  72. + private final CAS cas; // nocommit: make superclasses of BaseUIMATOkenizer set this?
  73.  
  74. private int finalOffset = 0;
  75.  
  76. @@ -65,12 +68,17 @@
  77. this.offsetAttr = addAttribute(OffsetAttribute.class);
  78. this.typeAttributeFeaturePath = typeAttributeFeaturePath;
  79. this.descriptorPath = descriptorPath;
  80. + try {
  81. + ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
  82. + cas = ae.newCAS();
  83. + } catch (ResourceInitializationException e) {
  84. + throw new RuntimeException(e);
  85. + }
  86. }
  87.  
  88. private void analyzeText() throws IOException, ResourceInitializationException, AnalysisEngineProcessException,
  89. CASException {
  90. - AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
  91. - CAS cas = analyzeInput(ae);
  92. + analyzeInput(ae, cas);
  93. finalOffset = correctOffset(cas.getDocumentText().length());
  94. Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
  95. iterator = cas.getAnnotationIndex(tokenType).iterator();
Add Comment
Please, Sign In to add comment