Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- index:
- number_of_shards: 1
- number_of_replicas: 0 # not for production
- analysis:
- char_filter:
- ru:
- type: mapping
- mappings: ['Ё=>Е', 'ё=>е']
- analyzer:
- tag_analyzer:
- alias: [tag]
- type: custom
- tokenizer: nGram
- filter: [tag_filter, russian_morphology, english_morphology]
- default_index:
- alias: [index_ru]
- type: custom
- tokenizer: nGram
- filter: [stopwords_ru, custom_word_delimiter, lowercase, russian_morphology, english_morphology, translit]
- char_filter: [ru]
- default_search:
- alias: [search_ru]
- type: custom
- tokenizer: standard
- filter: [stopwords_ru, custom_word_delimiter, lowercase, russian_morphology, english_morphology, translit]
- char_filter: [ru]
- tokenizer:
- nGram:
- type: nGram
- min_gram: 4
- max_gram: 20
- filter:
- translit:
- type: icu_transform
- id: Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC
- tag_filter:
- type: word_delimiter
- type_table: ['# => ALPHA', '@ => ALPHA']
- stopwords_ru:
- type: stop
- stopwords: _russian_
- ignore_case: true
- custom_word_delimiter:
- type: word_delimiter
- generate_word_parts: true
- generate_number_parts: true # "500-42" ⇒ "500" "42"
- catenate_words: true # "wi-fi" ⇒ "wifi"
- catenate_numbers: false # "500-42" ⇒ "50042"
- catenate_all: true # "wi-fi-4000" ⇒ "wifi4000"
- split_on_case_change: true # "PowerShot" ⇒ "Power" "Shot"
- preserve_original: true # "500-42" ⇒ "500-42" "500" "42"
- split_on_numerics: false # "j2se" ⇒ "j" "2" "se"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement