Advertisement
Guest User

Untitled

a guest
Aug 14th, 2017
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.41 KB | None | 0 0
  1. <!--
  2. =================================================================
  3. Nutch Solr Simple Text Core schema
  4. This xml contains Solr5.2.1 schema definitions for being
  5. able to crawl web pages with Nutch and then post to Solr for
  6. indexing and to access our search functionality.
  7. Authors: Apache Foundation, Camilo Tejeiro
  8. License: Apache V2. (Refer to roott dir for all licenses)
  9. =================================================================
  10. -->
  11. <!--
  12.  
  13. Description: This document contains Solr5.2.1 schema definitions
  14. for correct integration with Nutch 1.10
  15. -->
  16. <schema name="nutch" version="1.5">
  17. <types>
  18. <!-- The String field types, StrField -->
  19. <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
  20. <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
  21. <!--
  22. The Boolean field types, BoolField: can take true or false values
  23. -->
  24. <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  25. <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  26. <!-- Default numeric field types. -->
  27. <!--
  28. For faster range queries, consider the tint/tfloat/tlong/tdouble types
  29. -->
  30. <!-- Integer field types, TrieIntField class -->
  31. <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
  32. <fieldType name="ints" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  33. <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
  34. <fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  35. <!-- Long field types, TrieLongField class -->
  36. <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
  37. <fieldType name="longs" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  38. <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
  39. <fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  40. <!-- Floating point field types, TrieFloatField class -->
  41. <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
  42. <fieldType name="floats" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  43. <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
  44. <fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  45. <!-- Double field types, TrieFloatField class -->
  46. <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
  47. <fieldType name="doubles" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  48. <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
  49. <fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  50. <!-- Date field types, TrieDateField class -->
  51. <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
  52. <fieldType name="dates" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  53. <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
  54. <fieldType name="tdates" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
  55. <!--
  56. General text field that has reasonable, generic cross-language defaults
  57. This field type will be used to process general aggregated
  58. english text to be easily searchable, "text enabled for search"
  59. -->
  60. <fieldType name="text_en_search" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  61. <!-- Analyzer properties at index time -->
  62. <analyzer type="index">
  63. <!--
  64. break words appart (tokenizes) with with StandardTokenizer
  65. -->
  66. <tokenizer class="solr.StandardTokenizerFactory"/>
  67. <!-- filter out stop words -->
  68. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
  69. <!-- filter to ignore cases -->
  70. <filter class="solr.LowerCaseFilterFactory"/>
  71. </analyzer>
  72. <!-- Analyzer properties at query "search" time -->
  73. <analyzer type="query">
  74. <!--
  75. break search key words appart (tokenizes) with with StandardTokenizer
  76. -->
  77. <tokenizer class="solr.StandardTokenizerFactory"/>
  78. <!-- filter out stop words -->
  79. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
  80. <!-- filter to ignore cases -->
  81. <filter class="solr.LowerCaseFilterFactory"/>
  82. </analyzer>
  83. </fieldType>
  84. <fieldType name="url" class="solr.TextField" positionIncrementGap="100">
  85. <analyzer>
  86. <tokenizer class="solr.StandardTokenizerFactory"/>
  87. <filter class="solr.LowerCaseFilterFactory"/>
  88. <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"/>
  89. </analyzer>
  90. </fieldType>
  91. <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  92. </types>
  93. <!-- Field Names Definitions -->
  94. <fields>
  95. <!-- Solr Recommended/Required Fields -->
  96. <field name="id" type="string" stored="true" indexed="true" required="true"/>
  97. <field name="_version_" type="long" stored="true" indexed="true"/>
  98. <!--
  99. Field name which we use to hold all our aggregated fields that will be
  100. searchable
  101. -->
  102. <field name="default_search_field" type="text_en_search" stored="false" indexed="true" multiValued="true"/>
  103. <!-- Nutch core fields -->
  104. <field name="segment" type="string" stored="true" indexed="false"/>
  105. <field name="boost" type="float" stored="true" indexed="false"/>
  106. <field name="digest" type="string" stored="false" indexed="false"/>
  107. <!-- Nutch fields for index-basic plugin -->
  108. <field name="host" type="string" stored="true" indexed="false"/>
  109. <field name="title" type="string" stored="true" indexed="true"/>
  110. <field name="url" type="url" stored="false" indexed="true"/>
  111. <field name="content" type="string" stored="false" indexed="true" multiValued="true"/>
  112. <field name="tstamp" type="date" stored="false" indexed="false"/>
  113. </fields>
  114. <!--
  115. Field to use to determine and enforce document uniqueness.
  116. -->
  117. <uniqueKey>id</uniqueKey>
  118. <!--
  119. copy all of our indexed fields into our aggregated default
  120. search field
  121. -->
  122. <copyField source="*" dest="default_search_field"/>
  123. </schema>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement