Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?xml version="1.0" encoding="utf-8"?>
- <!--
- Put this below 'config' in solr/conf/solrconfig.xml
- <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
- <lst name="defaults">
- <str name="config">/tmp/dataimport-wordpress.xml</str>
- </lst>
- </requestHandler>
- This file is dataimport-wordpress.xml
- -->
- <dataConfig>
- <dataSource type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost/wp" user="root" password="" readOnly="true" batchSize="-1" />
- <document name="wordpress">
- <!-- Iterate over each Wordpress tablename prefix found in the information schema -->
- <entity
- name="blogtables"
- recursive="true"
- query="SELECT REPLACE(TABLE_NAME, '_posts', '') AS blog FROM information_schema.TABLES WHERE TABLE_SCHEMA=DATABASE() AND TABLE_NAME NOT LIKE 'wp_nwt_%' AND TABLE_NAME LIKE 'wp_%_posts'"
- rootEntity="false"
- >
- <!-- This is our root entity and each row returned will index one document -->
- <entity rootEntity="true"
- name="blog"
- pk="ID"
- query="SELECT * FROM ${blogtables.blog}_posts WHERE post_type='post' AND post_status='publish' AND post_password=''"
- transformer="HTMLStripTransformer,TemplateTransformer,NumberFormatTransformer">
- <!-- XXX: This should be $home/$post_date/$post_name/ -->
- <field name="url" column="guid" />
- <!-- XXX: Will always be 0 -->
- <entity query="SELECT IF(post_status='publish', 0, 1) AS deleted FROM ${blogtables.blog}_posts WHERE ID=${blog.ID}">
- <field column="deleted" />
- </entity>
- <field column="source" template="${blogtables.blog}" />
- <field name="sourceID" column="ID" formatStyle="number"/>
- <field name="pubdate" column="post_date_gmt" />
- <field name="moddate" column="post_modified_gmt" />
- <entity query="SELECT CONCAT(option_value, '/') AS section FROM ${blogtables.blog}_options WHERE option_name='home'">
- <field name="homesection" column="section" />
- <field name="sections" column="section" />
- </entity>
- <field name="headline" column="post_title" />
- <field name="body" column="post_content" stripHTML="true" />
- <entity query="SELECT display_name FROM ${blogtables.blog}_users WHERE ID=${blog.post_author}">
- <field name="authors" column="display_name" />
- </entity>
- </entity> <!-- /name: blog -->
- </entity> <!-- /name: blogtables -->
- </document> <!-- /name: wordpress -->
- </dataConfig>
Add Comment
Please, Sign In to add comment