Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 2nd, 2012  |  syntax: None  |  size: 3.96 KB  |  hits: 13  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. set -v
  2. # Delete an eventual previous index
  3. curl -XDELETE 'localhost:9200/bugindex'
  4.  
  5.  
  6. # Create an index
  7. curl -XPUT 'localhost:9200/bugindex' -d '{
  8.         "settings" : {
  9.                 "index" : {
  10.                         "number_of_shards" : 1,
  11.                         "number_of_replicas" : 0
  12.                 }
  13.         }
  14. }'
  15.  
  16.  
  17. # Define a type
  18. curl -XPUT 'localhost:9200/bugindex/bugtype/_mapping' -d '{
  19.         "bugtype" : {
  20.                 "_source" :     { "enabled" : false },
  21.                 "_all" :        { "enabled" : true },
  22.                 "properties" : {
  23.                         "_all" :                { "type" : "string" ,           "index" : "analyzed" ,  "store" : "no" ,        "analyzer" : "whitespace"       },
  24.                         "in_all" :              { "type" : "string" ,           "index" : "analyzed" ,  "store" : "yes" ,       "analyzer" : "keyword"          },
  25.                         "simple" :              { "type" : "string" ,           "index" : "analyzed" ,  "store" : "yes" ,       "analyzer" : "whitespace"       },
  26.                         "multi" :               { "type" : "multi_field" ,
  27.                                 "fields" : {
  28.                                         "multi" :       { "type" : "string" ,   "index" : "no" ,                "store" : "yes" ,       "include_in_all" : "no" },
  29.                                         "exact" :       { "type" : "string" ,   "index" : "analyzed" ,  "store" : "no" ,        "analyzer" : "whitespace" ,     "term_vector" : "with_positions_offsets"        },
  30.                                         "english" :     { "type" : "string" ,   "index" : "analyzed" ,  "store" : "no" ,        "analyzer" : "english" ,        "term_vector" : "with_positions_offsets"        }
  31.                                 }
  32.                         }
  33.                 }
  34.         }
  35. }'
  36.  
  37.  
  38. # Index a single test doc
  39. curl -XPUT 'localhost:9200/bugindex/bugtype/1' -d '{
  40.         "in_all" : "key word 1",
  41.         "simple" : "ab bc cd",
  42.         "multi" : "out of the box"
  43. }'
  44.  
  45.  
  46. # Make it avaiable for search
  47. curl -XPOST 'localhost:9200/bugindex/_optimize?refresh=true&flush=true'
  48.  
  49.  
  50.  
  51.  
  52. # Works as intended
  53. curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
  54.         "query" : {
  55.                 "term" : {
  56.                         "simple" : "bc"
  57.                 }
  58.         },
  59.         "highlight" : {
  60.                 "fields" : {
  61.                         "simple" : {
  62.                                 "number_of_fragments" : 0
  63.                         }
  64.                 }
  65.         }
  66. }'
  67.  
  68.  
  69. # Works as intended
  70. curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
  71.         "query" : {
  72.                 "term" : {
  73.                         "in_all" : "key word 1"
  74.                 }
  75.         },
  76.         "highlight" : {
  77.                 "fields" : {
  78.                         "in_all" : {
  79.                                 "number_of_fragments":0
  80.                         }
  81.                 }
  82.         }
  83. }'
  84.  
  85.  
  86. # Search so so (depends on the following), highlight works only if :
  87. #  - either _source is enabled
  88. #  - or     multi.exact is stored
  89. # Neither of the two is acceptable (index size is already a bit too large,
  90. #  we have _source disabled and all fields stored,
  91. #  as disk seeks are not so problematic with SSD)
  92. curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
  93.         "query" : {
  94.                 "term" : {
  95.                         "multi.exact" : "box"
  96.                 }
  97.         },
  98.         "highlight" : {
  99.                 "fields" : {
  100.                         "multi.exact" : {
  101.                                 "number_of_fragments":0
  102.                         }
  103.                 }
  104.         }
  105. }'
  106.  
  107.  
  108. # Search ok, highlight INEXISTENT
  109. # (searching against multi (same field as the highlighted one)
  110. #  won't work because it's not indexed)
  111. curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
  112.         "query" : {
  113.                 "term" : {
  114.                         "multi.exact" : "box"
  115.                 }
  116.         },
  117.         "highlight" : {
  118.                 "fields" : {
  119.                         "multi" : {
  120.                                 "number_of_fragments":0
  121.                         }
  122.                 }
  123.         }
  124. }'
  125.  
  126.  
  127.  
  128.  
  129. # It feels like we should:
  130. #  - Use a multi-term to analyze a single field in different manners
  131. #  - Index only the subfields
  132. #  - Store only the "main" subfield,
  133. #    as it would result in storing multiple times the exact same data otherwise
  134. #  - Query against one subfield (not the "main" subfield)
  135. # However with highlighting we would like to
  136. #  - highlight against the multi-field (ie the "main" subfield)
  137. #  - group highlights of searches against all the subfields
  138. #    (I agree everybody may not whish this though)
  139. # A solution would consist of either:
  140. #  - Accesing the "main" field's value (either stored of from source)
  141. #    when trying to access to the subfield's value.
  142. #  - Not using multi-fields and using special analyzers that would
  143. #    multiplex the terms from multiple sub-analyzers
  144. # Implementing this last solution may be beneficial also for:
  145. #  - Text search where you may wish to index exact, normalize and stemmed
  146. #    terms in the same field index.
  147. #  - Group analyzed terms from multiple fields into a kind of _all_terms field
  148. #    which would leverage differenciated analyzers and rapidity of searching
  149. #    against a single field index.