Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
- ===================================================================
- --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (revision 1244546)
- +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (working copy)
- @@ -44,19 +44,17 @@
- * analyzes the tokenizer input using the given analysis engine
- *
- * @param analysisEngine the AE to use for analyzing the tokenizer input
- - * @return CAS with extracted metadata (UIMA annotations, feature structures)
- + * @param CAS to fill with extracted metadata (UIMA annotations, feature structures)
- * @throws ResourceInitializationException
- *
- * @throws AnalysisEngineProcessException
- * @throws IOException
- */
- - protected CAS analyzeInput(AnalysisEngine analysisEngine) throws ResourceInitializationException,
- + protected void analyzeInput(AnalysisEngine analysisEngine, CAS cas) throws ResourceInitializationException,
- AnalysisEngineProcessException, IOException {
- - CAS cas = analysisEngine.newCAS();
- + cas.reset();
- cas.setDocumentText(toString(input));
- analysisEngine.process(cas);
- - analysisEngine.destroy();
- - return cas;
- }
- private String toString(Reader reader) throws IOException {
- Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
- ===================================================================
- --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (revision 1244546)
- +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (working copy)
- @@ -44,6 +44,9 @@
- private final String tokenTypeString;
- private final String descriptorPath;
- +
- + private final AnalysisEngine ae; // nocommit: make superclasses of BaseUIMATokenizer set this?
- + private final CAS cas; // nocommit: make superclasses of BaseUIMATOkenizer set this?
- private int finalOffset = 0;
- @@ -53,12 +56,17 @@
- this.termAttr = addAttribute(CharTermAttribute.class);
- this.offsetAttr = addAttribute(OffsetAttribute.class);
- this.descriptorPath = descriptorPath;
- + try {
- + ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
- + cas = ae.newCAS();
- + } catch (ResourceInitializationException e) {
- + throw new RuntimeException(e);
- + }
- }
- private void analyzeText(String descriptorPath) throws IOException, ResourceInitializationException,
- AnalysisEngineProcessException {
- - AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
- - CAS cas = analyzeInput(ae);
- + analyzeInput(ae, cas);
- finalOffset = correctOffset(cas.getDocumentText().length());
- Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
- iterator = cas.getAnnotationIndex(tokenType).iterator();
- Index: modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
- ===================================================================
- --- modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (revision 1244546)
- +++ modules/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (working copy)
- @@ -54,6 +54,9 @@
- private final String typeAttributeFeaturePath;
- private FeaturePath featurePath;
- +
- + private final AnalysisEngine ae; // nocommit: make superclasses of BaseUIMATokenizer set this?
- + private final CAS cas; // nocommit: make superclasses of BaseUIMATOkenizer set this?
- private int finalOffset = 0;
- @@ -65,12 +68,17 @@
- this.offsetAttr = addAttribute(OffsetAttribute.class);
- this.typeAttributeFeaturePath = typeAttributeFeaturePath;
- this.descriptorPath = descriptorPath;
- + try {
- + ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
- + cas = ae.newCAS();
- + } catch (ResourceInitializationException e) {
- + throw new RuntimeException(e);
- + }
- }
- private void analyzeText() throws IOException, ResourceInitializationException, AnalysisEngineProcessException,
- CASException {
- - AnalysisEngine ae = AEProviderFactory.getInstance().getAEProvider("", descriptorPath).getAE();
- - CAS cas = analyzeInput(ae);
- + analyzeInput(ae, cas);
- finalOffset = correctOffset(cas.getDocumentText().length());
- Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
- iterator = cas.getAnnotationIndex(tokenType).iterator();
Add Comment
Please, Sign In to add comment