- set -v
- # Delete an eventual previous index
- curl -XDELETE 'localhost:9200/bugindex'
- # Create an index
- curl -XPUT 'localhost:9200/bugindex' -d '{
- "settings" : {
- "index" : {
- "number_of_shards" : 1,
- "number_of_replicas" : 0
- }
- }
- }'
- # Define a type
- curl -XPUT 'localhost:9200/bugindex/bugtype/_mapping' -d '{
- "bugtype" : {
- "_source" : { "enabled" : false },
- "_all" : { "enabled" : true },
- "properties" : {
- "_all" : { "type" : "string" , "index" : "analyzed" , "store" : "no" , "analyzer" : "whitespace" },
- "in_all" : { "type" : "string" , "index" : "analyzed" , "store" : "yes" , "analyzer" : "keyword" },
- "simple" : { "type" : "string" , "index" : "analyzed" , "store" : "yes" , "analyzer" : "whitespace" },
- "multi" : { "type" : "multi_field" ,
- "fields" : {
- "multi" : { "type" : "string" , "index" : "no" , "store" : "yes" , "include_in_all" : "no" },
- "exact" : { "type" : "string" , "index" : "analyzed" , "store" : "no" , "analyzer" : "whitespace" , "term_vector" : "with_positions_offsets" },
- "english" : { "type" : "string" , "index" : "analyzed" , "store" : "no" , "analyzer" : "english" , "term_vector" : "with_positions_offsets" }
- }
- }
- }
- }
- }'
- # Index a single test doc
- curl -XPUT 'localhost:9200/bugindex/bugtype/1' -d '{
- "in_all" : "key word 1",
- "simple" : "ab bc cd",
- "multi" : "out of the box"
- }'
- # Make it avaiable for search
- curl -XPOST 'localhost:9200/bugindex/_optimize?refresh=true&flush=true'
- # Works as intended
- curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
- "query" : {
- "term" : {
- "simple" : "bc"
- }
- },
- "highlight" : {
- "fields" : {
- "simple" : {
- "number_of_fragments" : 0
- }
- }
- }
- }'
- # Works as intended
- curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
- "query" : {
- "term" : {
- "in_all" : "key word 1"
- }
- },
- "highlight" : {
- "fields" : {
- "in_all" : {
- "number_of_fragments":0
- }
- }
- }
- }'
- # Search so so (depends on the following), highlight works only if :
- # - either _source is enabled
- # - or multi.exact is stored
- # Neither of the two is acceptable (index size is already a bit too large,
- # we have _source disabled and all fields stored,
- # as disk seeks are not so problematic with SSD)
- curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
- "query" : {
- "term" : {
- "multi.exact" : "box"
- }
- },
- "highlight" : {
- "fields" : {
- "multi.exact" : {
- "number_of_fragments":0
- }
- }
- }
- }'
- # Search ok, highlight INEXISTENT
- # (searching against multi (same field as the highlighted one)
- # won't work because it's not indexed)
- curl -XGET 'localhost:9200/bugindex/bugtype/_search?pretty=1&fields=*' -d '{
- "query" : {
- "term" : {
- "multi.exact" : "box"
- }
- },
- "highlight" : {
- "fields" : {
- "multi" : {
- "number_of_fragments":0
- }
- }
- }
- }'
- # It feels like we should:
- # - Use a multi-term to analyze a single field in different manners
- # - Index only the subfields
- # - Store only the "main" subfield,
- # as it would result in storing multiple times the exact same data otherwise
- # - Query against one subfield (not the "main" subfield)
- # However with highlighting we would like to
- # - highlight against the multi-field (ie the "main" subfield)
- # - group highlights of searches against all the subfields
- # (I agree everybody may not whish this though)
- # A solution would consist of either:
- # - Accesing the "main" field's value (either stored of from source)
- # when trying to access to the subfield's value.
- # - Not using multi-fields and using special analyzers that would
- # multiplex the terms from multiple sub-analyzers
- # Implementing this last solution may be beneficial also for:
- # - Text search where you may wish to index exact, normalize and stemmed
- # terms in the same field index.
- # - Group analyzed terms from multiple fields into a kind of _all_terms field
- # which would leverage differenciated analyzers and rapidity of searching
- # against a single field index.