Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <dataConfig>
- <dataSource name="bin" type="BinFileDataSource" />
- <document>
- <entity name="FILE"
- dataSource="null"
- processor="FileListEntityProcessor"
- rootEntity="false"
- fileName="^.*\.((DOC)|(doc)|(DOCX)|(docx)|(html)|(HTML)|(XML)|(xml)|(PDF)|(pdf)|(PPT)|(ppt)|(PPTX)|(pptx)|(XLS)|(xls)|(XLSX)|(xlsx)|(TXT)|(txt)|(ODP)|(odp)|(RTF)|(rtf)|(MSG)|(msg))$"
- recursive="true"
- baseDir="C:/Users/Public/ficheiros testes/A"
- pk="id"
- logLevel="debug"
- >
- <field column="fileAbsolutePath" name="id" default="missing" />
- <field column="fileLastModified" name="last_modified"/>
- <field column="extension" name="extension_ss"/>
- <field column="file" name="filename"/>
- <!-- <field column="fileDirSplitted" name="tags2"/> -->
- <entity name="tikafile"
- processor="TikaEntityProcessor"
- format="text" onError="continue"
- dataSource="bin"
- url="${FILE.fileAbsolutePath}">
- <!--Do appropriate mapping here meta="true" means it is a metadata field -->
- <field column="title" meta="true" name="title"/>
- <field column="subject" meta="true" name="subject"/>
- <field column="description" meta="true" name="description"/>
- <field column="Comments" meta="true" name="comments"/>
- <field column="Author" meta="true" name="author"/>
- <field column="Keywords" meta="true" name="keywords"/>
- <field column="Category" meta="true" name="category"/>
- <field column="format" meta="true" name="content_type"/>
- <!--'text' is an implicit field emited by TikaEntityProcessor . Map it appropriately-->
- <!-- <field column="new_text" name="body"/> -->
- </entity>
- </entity>
- <entity name="FILE"
- dataSource="null"
- processor="FileListEntityProcessor"
- rootEntity="false"
- fileName="^.*\.((DOC)|(doc)|(DOCX)|(docx)|(html)|(HTML)|(XML)|(xml)|(PDF)|(pdf)|(PPT)|(ppt)|(PPTX)|(pptx)|(XLS)|(xls)|(XLSX)|(xlsx)|(TXT)|(txt)|(ODP)|(odp)|(RTF)|(rtf)|(MSG)|(msg))$"
- recursive="true"
- baseDir="C:/Users/Public/ficheiros testes/B"
- pk="id"
- logLevel="debug"
- >
- <field column="file" name="filename"/>
- <field column="fileAbsolutePath" name="id" default="missing" />
- <field column="fileLastModified" name="last_modified"/>
- <field column="extension" name="extension_ss"/>
- <!-- <field column="fileDirSplitted" name="tags2"/> -->
- <entity name="tikafile"
- processor="TikaEntityProcessor"
- format="text" onError="continue"
- dataSource="bin"
- url="${FILE.fileAbsolutePath}">
- <!--Do appropriate mapping here meta="true" means it is a metadata field -->
- <field column="title" meta="true" name="title"/>
- <field column="subject" meta="true" name="subject"/>
- <field column="description" meta="true" name="description"/>
- <field column="Comments" meta="true" name="comments"/>
- <field column="Author" meta="true" name="author"/>
- <field column="Keywords" meta="true" name="keywords"/>
- <field column="Category" meta="true" name="category"/>
- <field column="format" meta="true" name="content_type"/>
- <!--'text' is an implicit field emited by TikaEntityProcessor . Map it appropriately-->
- <!-- <field column="new_text" name="body"/> -->
- </entity>
- </entity>
- </document>
- </dataConfig>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement