Advertisement
Guest User

Untitled

a guest
Oct 11th, 2016
521
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 42.23 KB | None | 0 0
  1. <?xml version="1.0" encoding="UTF-8" ?>
  2. <schema name="example-data-driven-schema" version="1.6">
  3.  
  4. <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
  5. <field name="name" type="text_general" indexed="true" stored="true" default="" />
  6. <field name="brand_id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
  7. <field name="brand_name" type="text_general" indexed="true" stored="true" default="" />
  8. <field name="type" type="string" indexed="true" stored="true" required="true" default="0" />
  9.  
  10. <field name="_version_" type="long" indexed="true" stored="false"/>
  11. <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
  12. <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
  13. <copyField source="*" dest="_text_"/>
  14.  
  15. <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
  16. <dynamicField name="*_is" type="ints" indexed="true" stored="true"/>
  17. <dynamicField name="*_s" type="string" indexed="true" stored="true" />
  18. <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
  19. <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
  20. <dynamicField name="*_ls" type="longs" indexed="true" stored="true"/>
  21. <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
  22. <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
  23. <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
  24. <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
  25. <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
  26. <dynamicField name="*_fs" type="floats" indexed="true" stored="true"/>
  27. <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
  28. <dynamicField name="*_ds" type="doubles" indexed="true" stored="true"/>
  29.  
  30. <!-- Type used to index the lat and lon components for the "location" FieldType -->
  31. <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" useDocValuesAsStored="false" />
  32.  
  33. <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
  34. <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
  35. <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
  36. <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/>
  37.  
  38. <!-- some trie-coded dynamic fields for faster range queries -->
  39. <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
  40. <dynamicField name="*_tis" type="tints" indexed="true" stored="true"/>
  41. <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
  42. <dynamicField name="*_tls" type="tlongs" indexed="true" stored="true"/>
  43. <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
  44. <dynamicField name="*_tfs" type="tfloats" indexed="true" stored="true"/>
  45. <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
  46. <dynamicField name="*_tds" type="tdoubles" indexed="true" stored="true"/>
  47. <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
  48. <dynamicField name="*_tdts" type="tdates" indexed="true" stored="true"/>
  49.  
  50. <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
  51.  
  52. <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
  53. <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
  54.  
  55. <dynamicField name="random_*" type="random" />
  56.  
  57.  
  58. <uniqueKey>id</uniqueKey>
  59.  
  60. <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
  61. <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
  62.  
  63. <!-- boolean type: "true" or "false" -->
  64. <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  65.  
  66. <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
  67.  
  68. <fieldType name="int" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  69. <fieldType name="float" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  70. <fieldType name="long" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  71. <fieldType name="double" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  72.  
  73. <fieldType name="ints" class="solr.TrieIntField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  74. <fieldType name="floats" class="solr.TrieFloatField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  75. <fieldType name="longs" class="solr.TrieLongField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  76. <fieldType name="doubles" class="solr.TrieDoubleField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  77.  
  78. <fieldType name="tint" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
  79. <fieldType name="tfloat" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
  80. <fieldType name="tlong" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
  81. <fieldType name="tdouble" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0"/>
  82.  
  83. <fieldType name="tints" class="solr.TrieIntField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  84. <fieldType name="tfloats" class="solr.TrieFloatField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  85. <fieldType name="tlongs" class="solr.TrieLongField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  86. <fieldType name="tdoubles" class="solr.TrieDoubleField" docValues="true" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
  87.  
  88. <fieldType name="date" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0"/>
  89. <fieldType name="dates" class="solr.TrieDateField" docValues="true" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
  90.  
  91. <!-- A Trie based date field for faster date range queries and date faceting. -->
  92. <fieldType name="tdate" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0"/>
  93.  
  94. <fieldType name="tdates" class="solr.TrieDateField" docValues="true" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
  95.  
  96. <fieldType name="binary" class="solr.BinaryField"/>
  97.  
  98. <fieldType name="random" class="solr.RandomSortField" indexed="true" />
  99.  
  100. <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
  101. <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  102. <analyzer>
  103. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  104. </analyzer>
  105. </fieldType>
  106.  
  107. <fieldType name="text" class="solr.TextField">
  108. <analyzer type="index">
  109. <tokenizer class="solr.LowerCaseTokenizerFactory"/>
  110. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25" />
  111. <!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" />
  112. --> </analyzer>
  113. <analyzer type="query">
  114. <tokenizer class="solr.LowerCaseTokenizerFactory"/>
  115. </analyzer>
  116. </fieldType>
  117.  
  118. <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
  119. <analyzer type="index">
  120. <tokenizer class="solr.StandardTokenizerFactory"/>
  121. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
  122. <filter class="solr.LowerCaseFilterFactory"/>
  123. <filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="25" />
  124. <!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" />
  125. --> </analyzer>
  126.  
  127. <analyzer type="query">
  128. <tokenizer class="solr.StandardTokenizerFactory"/>
  129. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
  130. <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  131. <filter class="solr.LowerCaseFilterFactory"/>
  132. </analyzer>
  133. </fieldType>
  134.  
  135. <!-- A text field with defaults appropriate for English: it
  136. tokenizes with StandardTokenizer, removes English stop words
  137. (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
  138. finally applies Porter's stemming. The query time analyzer
  139. also applies synonyms from synonyms.txt. -->
  140. <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
  141. <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
  142. <analyzer type="index">
  143. <tokenizer class="solr.StandardTokenizerFactory"/>
  144. <!-- in this example, we will only use synonyms at query time
  145. <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  146. -->
  147. <!-- Case insensitive stop word removal.
  148. -->
  149. <filter class="solr.StopFilterFactory"
  150. ignoreCase="true"
  151. words="lang/stopwords_en.txt"
  152. />
  153. <filter class="solr.LowerCaseFilterFactory"/>
  154. <filter class="solr.EnglishPossessiveFilterFactory"/>
  155. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  156. <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  157. <filter class="solr.EnglishMinimalStemFilterFactory"/>
  158. -->
  159. <filter class="solr.PorterStemFilterFactory"/>
  160. </analyzer>
  161. <analyzer type="query">
  162. <tokenizer class="solr.StandardTokenizerFactory"/>
  163. <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  164. <filter class="solr.StopFilterFactory"
  165. ignoreCase="true"
  166. words="lang/stopwords_en.txt"
  167. />
  168. <filter class="solr.LowerCaseFilterFactory"/>
  169. <filter class="solr.EnglishPossessiveFilterFactory"/>
  170. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  171. <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
  172. <filter class="solr.EnglishMinimalStemFilterFactory"/>
  173. -->
  174. <filter class="solr.PorterStemFilterFactory"/>
  175. </analyzer>
  176. </fieldType>
  177.  
  178. <!-- A text field with defaults appropriate for English, plus
  179. aggressive word-splitting and autophrase features enabled.
  180. This field is just like text_en, except it adds
  181. WordDelimiterFilter to enable splitting and matching of
  182. words on case-change, alpha numeric boundaries, and
  183. non-alphanumeric chars. This means certain compound word
  184. cases will work, for example query "wi fi" will match
  185. document "WiFi" or "wi-fi".
  186. -->
  187. <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
  188. <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  189. <analyzer type="index">
  190. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  191. <!-- in this example, we will only use synonyms at query time
  192. <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
  193. -->
  194. <!-- Case insensitive stop word removal.
  195. -->
  196. <filter class="solr.StopFilterFactory"
  197. ignoreCase="true"
  198. words="lang/stopwords_en.txt"
  199. />
  200. <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
  201. <filter class="solr.LowerCaseFilterFactory"/>
  202. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  203. <filter class="solr.PorterStemFilterFactory"/>
  204. </analyzer>
  205. <analyzer type="query">
  206. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  207. <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  208. <filter class="solr.StopFilterFactory"
  209. ignoreCase="true"
  210. words="lang/stopwords_en.txt"
  211. />
  212. <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
  213. <filter class="solr.LowerCaseFilterFactory"/>
  214. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  215. <filter class="solr.PorterStemFilterFactory"/>
  216. </analyzer>
  217. </fieldType>
  218.  
  219. <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
  220. but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
  221. <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
  222. <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
  223. <analyzer>
  224. <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  225. <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
  226. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
  227. <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
  228. <filter class="solr.LowerCaseFilterFactory"/>
  229. <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
  230. <filter class="solr.EnglishMinimalStemFilterFactory"/>
  231. <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
  232. possible with WordDelimiterFilter in conjuncton with stemming. -->
  233. <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  234. </analyzer>
  235. </fieldType>
  236.  
  237. <!-- Just like text_general except it reverses the characters of
  238. each token, to enable more efficient leading wildcard queries.
  239. -->
  240. <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
  241. <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
  242. <analyzer type="index">
  243. <tokenizer class="solr.StandardTokenizerFactory"/>
  244. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
  245. <filter class="solr.LowerCaseFilterFactory"/>
  246. <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
  247. maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
  248. </analyzer>
  249. <analyzer type="query">
  250. <tokenizer class="solr.StandardTokenizerFactory"/>
  251. <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
  252. <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
  253. <filter class="solr.LowerCaseFilterFactory"/>
  254. </analyzer>
  255. </fieldType>
  256.  
  257. <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
  258. <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
  259. <analyzer>
  260. <tokenizer class="solr.StandardTokenizerFactory"/>
  261. <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
  262. </analyzer>
  263. </fieldType>
  264.  
  265. <!-- lowercases the entire field value, keeping it as a single token. -->
  266. <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
  267. <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
  268. <analyzer>
  269. <tokenizer class="solr.KeywordTokenizerFactory"/>
  270. <filter class="solr.LowerCaseFilterFactory" />
  271. </analyzer>
  272. </fieldType>
  273.  
  274. <!--
  275. Example of using PathHierarchyTokenizerFactory at index time, so
  276. queries for paths match documents at that path, or in descendent paths
  277. -->
  278. <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
  279. <fieldType name="descendent_path" class="solr.TextField">
  280. <analyzer type="index">
  281. <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  282. </analyzer>
  283. <analyzer type="query">
  284. <tokenizer class="solr.KeywordTokenizerFactory" />
  285. </analyzer>
  286. </fieldType>
  287.  
  288. <!--
  289. Example of using PathHierarchyTokenizerFactory at query time, so
  290. queries for paths match documents at that path, or in ancestor paths
  291. -->
  292. <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
  293. <fieldType name="ancestor_path" class="solr.TextField">
  294. <analyzer type="index">
  295. <tokenizer class="solr.KeywordTokenizerFactory" />
  296. </analyzer>
  297. <analyzer type="query">
  298. <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
  299. </analyzer>
  300. </fieldType>
  301.  
  302. <!-- since fields of this type are by default not stored or indexed,
  303. any data added to them will be ignored outright. -->
  304. <fieldType name="ignored" stored="false" indexed="false" docValues="false" multiValued="true" class="solr.StrField" />
  305.  
  306. <!-- This point type indexes the coordinates as separate fields (subFields)
  307. If subFieldType is defined, it references a type, and a dynamic field
  308. definition is created matching *___<typename>. Alternately, if
  309. subFieldSuffix is defined, that is used to create the subFields.
  310. Example: if subFieldType="double", then the coordinates would be
  311. indexed in fields myloc_0___double,myloc_1___double.
  312. Example: if subFieldSuffix="_d" then the coordinates would be indexed
  313. in fields myloc_0_d,myloc_1_d
  314. The subFields are an implementation detail of the fieldType, and end
  315. users normally should not need to know about them.
  316. -->
  317. <dynamicField name="*_point" type="point" indexed="true" stored="true"/>
  318. <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
  319.  
  320. <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
  321. <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
  322.  
  323. <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
  324. For more information about this and other Spatial fields new to Solr 4, see:
  325. http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
  326. -->
  327. <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
  328. geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
  329.  
  330. <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
  331. Parameters:
  332. defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
  333. precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
  334. providerClass: Lets you plug in other exchange provider backend:
  335. solr.FileExchangeRateProvider is the default and takes one parameter:
  336. currencyConfig: name of an xml file holding exchange rates
  337. solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
  338. ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
  339. refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
  340. -->
  341. <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
  342.  
  343.  
  344.  
  345. <!-- some examples for different languages (generally ordered by ISO code) -->
  346.  
  347. <!-- Arabic -->
  348. <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/>
  349. <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
  350. <analyzer>
  351. <tokenizer class="solr.StandardTokenizerFactory"/>
  352. <!-- for any non-arabic -->
  353. <filter class="solr.LowerCaseFilterFactory"/>
  354. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
  355. <!-- normalizes ﻯ to ﻱ, etc -->
  356. <filter class="solr.ArabicNormalizationFilterFactory"/>
  357. <filter class="solr.ArabicStemFilterFactory"/>
  358. </analyzer>
  359. </fieldType>
  360.  
  361. <!-- Bulgarian -->
  362. <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/>
  363. <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
  364. <analyzer>
  365. <tokenizer class="solr.StandardTokenizerFactory"/>
  366. <filter class="solr.LowerCaseFilterFactory"/>
  367. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
  368. <filter class="solr.BulgarianStemFilterFactory"/>
  369. </analyzer>
  370. </fieldType>
  371.  
  372. <!-- Catalan -->
  373. <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/>
  374. <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
  375. <analyzer>
  376. <tokenizer class="solr.StandardTokenizerFactory"/>
  377. <!-- removes l', etc -->
  378. <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
  379. <filter class="solr.LowerCaseFilterFactory"/>
  380. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
  381. <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
  382. </analyzer>
  383. </fieldType>
  384.  
  385. <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
  386. <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/>
  387. <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
  388. <analyzer>
  389. <tokenizer class="solr.StandardTokenizerFactory"/>
  390. <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
  391. <filter class="solr.CJKWidthFilterFactory"/>
  392. <!-- for any non-CJK -->
  393. <filter class="solr.LowerCaseFilterFactory"/>
  394. <filter class="solr.CJKBigramFilterFactory"/>
  395. </analyzer>
  396. </fieldType>
  397.  
  398. <!-- Czech -->
  399. <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
  400. <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
  401. <analyzer>
  402. <tokenizer class="solr.StandardTokenizerFactory"/>
  403. <filter class="solr.LowerCaseFilterFactory"/>
  404. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
  405. <filter class="solr.CzechStemFilterFactory"/>
  406. </analyzer>
  407. </fieldType>
  408.  
  409. <!-- Danish -->
  410. <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/>
  411. <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
  412. <analyzer>
  413. <tokenizer class="solr.StandardTokenizerFactory"/>
  414. <filter class="solr.LowerCaseFilterFactory"/>
  415. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
  416. <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
  417. </analyzer>
  418. </fieldType>
  419.  
  420. <!-- German -->
  421. <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/>
  422. <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
  423. <analyzer>
  424. <tokenizer class="solr.StandardTokenizerFactory"/>
  425. <filter class="solr.LowerCaseFilterFactory"/>
  426. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
  427. <filter class="solr.GermanNormalizationFilterFactory"/>
  428. <filter class="solr.GermanLightStemFilterFactory"/>
  429. <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
  430. <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
  431. </analyzer>
  432. </fieldType>
  433.  
  434. <!-- Greek -->
  435. <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/>
  436. <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
  437. <analyzer>
  438. <tokenizer class="solr.StandardTokenizerFactory"/>
  439. <!-- greek specific lowercase for sigma -->
  440. <filter class="solr.GreekLowerCaseFilterFactory"/>
  441. <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
  442. <filter class="solr.GreekStemFilterFactory"/>
  443. </analyzer>
  444. </fieldType>
  445.  
  446. <!-- Spanish -->
  447. <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/>
  448. <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
  449. <analyzer>
  450. <tokenizer class="solr.StandardTokenizerFactory"/>
  451. <filter class="solr.LowerCaseFilterFactory"/>
  452. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
  453. <filter class="solr.SpanishLightStemFilterFactory"/>
  454. <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
  455. </analyzer>
  456. </fieldType>
  457.  
  458. <!-- Basque -->
  459. <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
  460. <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
  461. <analyzer>
  462. <tokenizer class="solr.StandardTokenizerFactory"/>
  463. <filter class="solr.LowerCaseFilterFactory"/>
  464. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
  465. <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
  466. </analyzer>
  467. </fieldType>
  468.  
  469. <!-- Persian -->
  470. <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/>
  471. <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
  472. <analyzer>
  473. <!-- for ZWNJ -->
  474. <charFilter class="solr.PersianCharFilterFactory"/>
  475. <tokenizer class="solr.StandardTokenizerFactory"/>
  476. <filter class="solr.LowerCaseFilterFactory"/>
  477. <filter class="solr.ArabicNormalizationFilterFactory"/>
  478. <filter class="solr.PersianNormalizationFilterFactory"/>
  479. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
  480. </analyzer>
  481. </fieldType>
  482.  
  483. <!-- Finnish -->
  484. <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
  485. <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
  486. <analyzer>
  487. <tokenizer class="solr.StandardTokenizerFactory"/>
  488. <filter class="solr.LowerCaseFilterFactory"/>
  489. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
  490. <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
  491. <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
  492. </analyzer>
  493. </fieldType>
  494.  
  495. <!-- French -->
  496. <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/>
  497. <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
  498. <analyzer>
  499. <tokenizer class="solr.StandardTokenizerFactory"/>
  500. <!-- removes l', etc -->
  501. <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
  502. <filter class="solr.LowerCaseFilterFactory"/>
  503. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
  504. <filter class="solr.FrenchLightStemFilterFactory"/>
  505. <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
  506. <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
  507. </analyzer>
  508. </fieldType>
  509.  
  510. <!-- Irish -->
  511. <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
  512. <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
  513. <analyzer>
  514. <tokenizer class="solr.StandardTokenizerFactory"/>
  515. <!-- removes d', etc -->
  516. <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
  517. <!-- removes n-, etc. position increments is intentionally false! -->
  518. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
  519. <filter class="solr.IrishLowerCaseFilterFactory"/>
  520. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
  521. <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
  522. </analyzer>
  523. </fieldType>
  524.  
  525. <!-- Galician -->
  526. <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/>
  527. <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
  528. <analyzer>
  529. <tokenizer class="solr.StandardTokenizerFactory"/>
  530. <filter class="solr.LowerCaseFilterFactory"/>
  531. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
  532. <filter class="solr.GalicianStemFilterFactory"/>
  533. <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
  534. </analyzer>
  535. </fieldType>
  536.  
  537. <!-- Hindi -->
  538. <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/>
  539. <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
  540. <analyzer>
  541. <tokenizer class="solr.StandardTokenizerFactory"/>
  542. <filter class="solr.LowerCaseFilterFactory"/>
  543. <!-- normalizes unicode representation -->
  544. <filter class="solr.IndicNormalizationFilterFactory"/>
  545. <!-- normalizes variation in spelling -->
  546. <filter class="solr.HindiNormalizationFilterFactory"/>
  547. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
  548. <filter class="solr.HindiStemFilterFactory"/>
  549. </analyzer>
  550. </fieldType>
  551.  
  552. <!-- Hungarian -->
  553. <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/>
  554. <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
  555. <analyzer>
  556. <tokenizer class="solr.StandardTokenizerFactory"/>
  557. <filter class="solr.LowerCaseFilterFactory"/>
  558. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
  559. <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
  560. <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
  561. </analyzer>
  562. </fieldType>
  563.  
  564. <!-- Armenian -->
  565. <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/>
  566. <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
  567. <analyzer>
  568. <tokenizer class="solr.StandardTokenizerFactory"/>
  569. <filter class="solr.LowerCaseFilterFactory"/>
  570. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
  571. <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
  572. </analyzer>
  573. </fieldType>
  574.  
  575. <!-- Indonesian -->
  576. <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
  577. <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
  578. <analyzer>
  579. <tokenizer class="solr.StandardTokenizerFactory"/>
  580. <filter class="solr.LowerCaseFilterFactory"/>
  581. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
  582. <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
  583. <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  584. </analyzer>
  585. </fieldType>
  586.  
  587. <!-- Italian -->
  588. <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
  589. <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
  590. <analyzer>
  591. <tokenizer class="solr.StandardTokenizerFactory"/>
  592. <!-- removes l', etc -->
  593. <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
  594. <filter class="solr.LowerCaseFilterFactory"/>
  595. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
  596. <filter class="solr.ItalianLightStemFilterFactory"/>
  597. <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
  598. </analyzer>
  599. </fieldType>
  600.  
  601. <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
  602.  
  603. NOTE: If you want to optimize search for precision, use default operator AND in your query
  604. parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
  605. OR if you would like to optimize for recall (default).
  606. -->
  607. <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/>
  608. <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
  609. <analyzer>
  610. <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
  611.  
  612. Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
  613. is used to segment compounds into its parts and the compound itself is kept as synonym.
  614.  
  615. Valid values for attribute mode are:
  616. normal: regular segmentation
  617. search: segmentation useful for search with synonyms compounds (default)
  618. extended: same as search mode, but unigrams unknown words (experimental)
  619.  
  620. For some applications it might be good to use search mode for indexing and normal mode for
  621. queries to reduce recall and prevent parts of compounds from being matched and highlighted.
  622. Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
  623.  
  624. Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
  625. model with your own entries for segmentation, part-of-speech tags and readings without a need
  626. to specify weights. Notice that user dictionaries have not been subject to extensive testing.
  627.  
  628. User dictionary attributes are:
  629. userDictionary: user dictionary filename
  630. userDictionaryEncoding: user dictionary encoding (default is UTF-8)
  631.  
  632. See lang/userdict_ja.txt for a sample user dictionary file.
  633.  
  634. Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
  635.  
  636. See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
  637. -->
  638. <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
  639. <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
  640. <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
  641. <filter class="solr.JapaneseBaseFormFilterFactory"/>
  642. <!-- Removes tokens with certain part-of-speech tags -->
  643. <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
  644. <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
  645. <filter class="solr.CJKWidthFilterFactory"/>
  646. <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
  647. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
  648. <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
  649. <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
  650. <!-- Lower-cases romaji characters -->
  651. <filter class="solr.LowerCaseFilterFactory"/>
  652. </analyzer>
  653. </fieldType>
  654.  
  655. <!-- Latvian -->
  656. <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
  657. <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
  658. <analyzer>
  659. <tokenizer class="solr.StandardTokenizerFactory"/>
  660. <filter class="solr.LowerCaseFilterFactory"/>
  661. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
  662. <filter class="solr.LatvianStemFilterFactory"/>
  663. </analyzer>
  664. </fieldType>
  665.  
  666. <!-- Dutch -->
  667. <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
  668. <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
  669. <analyzer>
  670. <tokenizer class="solr.StandardTokenizerFactory"/>
  671. <filter class="solr.LowerCaseFilterFactory"/>
  672. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
  673. <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
  674. <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
  675. </analyzer>
  676. </fieldType>
  677.  
  678. <!-- Norwegian -->
  679. <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
  680. <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
  681. <analyzer>
  682. <tokenizer class="solr.StandardTokenizerFactory"/>
  683. <filter class="solr.LowerCaseFilterFactory"/>
  684. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
  685. <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
  686. <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
  687. <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
  688. </analyzer>
  689. </fieldType>
  690.  
  691. <!-- Portuguese -->
  692. <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/>
  693. <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
  694. <analyzer>
  695. <tokenizer class="solr.StandardTokenizerFactory"/>
  696. <filter class="solr.LowerCaseFilterFactory"/>
  697. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
  698. <filter class="solr.PortugueseLightStemFilterFactory"/>
  699. <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
  700. <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
  701. <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
  702. </analyzer>
  703. </fieldType>
  704.  
  705. <!-- Romanian -->
  706. <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/>
  707. <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
  708. <analyzer>
  709. <tokenizer class="solr.StandardTokenizerFactory"/>
  710. <filter class="solr.LowerCaseFilterFactory"/>
  711. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
  712. <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
  713. </analyzer>
  714. </fieldType>
  715.  
  716. <!-- Russian -->
  717. <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/>
  718. <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
  719. <analyzer>
  720. <tokenizer class="solr.StandardTokenizerFactory"/>
  721. <filter class="solr.LowerCaseFilterFactory"/>
  722. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
  723. <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
  724. <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
  725. </analyzer>
  726. </fieldType>
  727.  
  728. <!-- Swedish -->
  729. <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/>
  730. <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
  731. <analyzer>
  732. <tokenizer class="solr.StandardTokenizerFactory"/>
  733. <filter class="solr.LowerCaseFilterFactory"/>
  734. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
  735. <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
  736. <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
  737. </analyzer>
  738. </fieldType>
  739.  
  740. <!-- Thai -->
  741. <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/>
  742. <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
  743. <analyzer>
  744. <tokenizer class="solr.ThaiTokenizerFactory"/>
  745. <filter class="solr.LowerCaseFilterFactory"/>
  746. <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
  747. </analyzer>
  748. </fieldType>
  749.  
  750. <!-- Turkish -->
  751. <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
  752. <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
  753. <analyzer>
  754. <tokenizer class="solr.StandardTokenizerFactory"/>
  755. <filter class="solr.TurkishLowerCaseFilterFactory"/>
  756. <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
  757. <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
  758. </analyzer>
  759. </fieldType>
  760.  
  761.  
  762.  
  763. </schema>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement