Advertisement
Guest User

Untitled

a guest
Mar 10th, 2015
241
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
XML 3.30 KB | None | 0 0
  1. <dataConfig>
  2.     <dataSource name="bin" type="BinFileDataSource" />
  3.     <document>
  4.       <entity name="FILE"
  5.               dataSource="null"
  6.              processor="FileListEntityProcessor"
  7.              rootEntity="false"
  8.               fileName="^.*\.((DOC)|(doc)|(DOCX)|(docx)|(html)|(HTML)|(XML)|(xml)|(PDF)|(pdf)|(PPT)|(ppt)|(PPTX)|(pptx)|(XLS)|(xls)|(XLSX)|(xlsx)|(TXT)|(txt)|(ODP)|(odp)|(RTF)|(rtf)|(MSG)|(msg))$"
  9.               recursive="true"
  10.               baseDir="C:/Users/Public/ficheiros testes/A"
  11.               pk="id"
  12.               logLevel="debug"
  13.      >
  14.         <field column="fileAbsolutePath" name="id" default="missing" />
  15.         <field column="fileLastModified" name="last_modified"/>
  16.         <field column="extension" name="extension_ss"/>
  17.                     <field column="file" name="filename"/>
  18. <!--        <field column="fileDirSplitted" name="tags2"/> -->
  19.  
  20.         <entity name="tikafile"
  21.                 processor="TikaEntityProcessor"
  22.                 format="text" onError="continue"
  23.                 dataSource="bin"
  24.                 url="${FILE.fileAbsolutePath}">
  25.                 <!--Do appropriate mapping here  meta="true" means it is a metadata field -->
  26.                 <field column="title" meta="true" name="title"/>
  27.  
  28.                 <field column="subject" meta="true" name="subject"/>
  29.                 <field column="description" meta="true" name="description"/>
  30.                 <field column="Comments" meta="true" name="comments"/>
  31.                 <field column="Author" meta="true" name="author"/>
  32.                 <field column="Keywords" meta="true" name="keywords"/>
  33.                 <field column="Category" meta="true" name="category"/>
  34.                 <field column="format" meta="true" name="content_type"/>
  35.                 <!--'text' is an implicit field emited by TikaEntityProcessor . Map it appropriately-->
  36.                 <!-- <field column="new_text" name="body"/> -->
  37.                
  38.             </entity>
  39.  
  40.         </entity>
  41.         <entity name="FILE"
  42.               dataSource="null"
  43.              processor="FileListEntityProcessor"
  44.              rootEntity="false"
  45.               fileName="^.*\.((DOC)|(doc)|(DOCX)|(docx)|(html)|(HTML)|(XML)|(xml)|(PDF)|(pdf)|(PPT)|(ppt)|(PPTX)|(pptx)|(XLS)|(xls)|(XLSX)|(xlsx)|(TXT)|(txt)|(ODP)|(odp)|(RTF)|(rtf)|(MSG)|(msg))$"
  46.               recursive="true"
  47.               baseDir="C:/Users/Public/ficheiros testes/B"
  48.               pk="id"
  49.               logLevel="debug"
  50.      >
  51.                 <field column="file" name="filename"/>
  52.         <field column="fileAbsolutePath" name="id" default="missing" />
  53.         <field column="fileLastModified" name="last_modified"/>
  54.         <field column="extension" name="extension_ss"/>
  55. <!--        <field column="fileDirSplitted" name="tags2"/> -->
  56.  
  57.         <entity name="tikafile"
  58.                 processor="TikaEntityProcessor"
  59.                 format="text" onError="continue"
  60.                 dataSource="bin"
  61.                 url="${FILE.fileAbsolutePath}">
  62.                 <!--Do appropriate mapping here  meta="true" means it is a metadata field -->
  63.                 <field column="title" meta="true" name="title"/>
  64.                 <field column="subject" meta="true" name="subject"/>
  65.                 <field column="description" meta="true" name="description"/>
  66.                 <field column="Comments" meta="true" name="comments"/>
  67.                 <field column="Author" meta="true" name="author"/>
  68.                 <field column="Keywords" meta="true" name="keywords"/>
  69.                 <field column="Category" meta="true" name="category"/>
  70.                 <field column="format" meta="true" name="content_type"/>
  71.                 <!--'text' is an implicit field emited by TikaEntityProcessor . Map it appropriately-->
  72.                 <!-- <field column="new_text" name="body"/> -->
  73.                
  74.             </entity>
  75.  
  76.         </entity>
  77.     </document>
  78. </dataConfig>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement