Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # Sphinx configuration for MediaWiki
- #
- # Based on examples by Paul Grinberg at http://www.mediawiki.org/wiki/Extension:SphinxSearch
- # and Hank at http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx
- # Modified by Svemir Brkic for http://www.newworldencyclopedia.org/
- #
- # Released under GNU General Public License (see http://www.fsf.org/licenses/gpl.html)
- #
- # Latest version available at http://www.mediawiki.org/wiki/Extension:SphinxSearch
- # data source definition for the main index
- source src_wiki_main
- {
- # data source
- type = mysql
- sql_host = localhost
- sql_db = database
- sql_user = user
- sql_pass = password
- # these two are optional
- #sql_port = 3306
- #sql_sock = /var/lib/mysql/mysql.sock
- # pre-query, executed before the main fetch query
- sql_query_pre = SET NAMES utf8
- # main document fetch query - change the table names if you are using a prefix
- sql_query = SELECT page_id, page_title, page_namespace, page_is_redirect, old_id, old_text FROM IES_page, IES_revision, IES_text WHERE rev_id=page_latest AND old_id=rev_text_id
- # attribute columns
- sql_attr_uint = page_namespace
- sql_attr_uint = page_is_redirect
- sql_attr_uint = old_id
- # collect all category ids for category filtering
- sql_attr_multi = uint category from query; SELECT cl_from, page_id AS category FROM IES_categorylinks, IES_page WHERE page_title=cl_to AND page_namespace=14
- # used by command-line search utility to display document information
- sql_query_info = SELECT page_title, page_namespace FROM IES_page WHERE page_id=$id
- }
- # data source definition for the incremental index
- source src_wiki_incremental : src_wiki_main
- {
- # adjust this query based on the time you run the full index
- # in this case, full index runs at 7 AM UTC
- sql_query = SELECT page_id, page_title, page_namespace, page_is_redirect, old_id, old_text FROM IES_page, IES_revision, IES_text WHERE rev_id=page_latest AND old_id=rev_text_id AND page_touched>=DATE_FORMAT(CURDATE(), '%Y%m%d070000')
- # all other parameters are copied from the parent source
- }
- # main index definition
- index wiki_main
- {
- # which document source to index
- source = src_wiki_main
- # this is path and index file name without extension
- # you may need to change this path or create this folder
- path = /var/data/sphinx/wiki_main
- # docinfo (ie. per-document attribute values) storage strategy
- docinfo = extern
- # morphology
- morphology = stem_en
- # stopwords file
- #stopwords = /var/data/sphinx/stopwords.txt
- # minimum word length
- min_word_len = 1
- # allow wildcard (*) searches
- min_infix_len = 1
- enable_star = 1
- # charset encoding type
- charset_type = utf-8
- # charset definition and case folding rules "table"
- charset_table = 0..9, A..Z->a..z, a..z, \
- U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, \
- U+C7->c,U+E7->c, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, \
- U+CD->i, U+CE->i, U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, \
- U+D4->o, U+D5->o, U+D6->o, U+D8->o, U+D9->u, U+DA->u, U+DB->u, \
- U+DC->u, U+DD->y, U+DE->t, U+DF->s, \
- U+E0->a, U+E1->a, U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E6->a, \
- U+E7->c,U+E7->c, U+E8->e, U+E9->e, U+EA->e, U+EB->e, U+EC->i, \
- U+ED->i, U+EE->i, U+EF->i, U+F0->d, U+F1->n, U+F2->o, U+F3->o, \
- U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u, U+FB->u, \
- U+FC->u, U+FD->y, U+FE->t, U+FF->s,
- }
- # incremental index definition
- index wiki_incremental : wiki_main
- {
- path = /var/data/sphinx/wiki_incremental
- source = src_wiki_incremental
- }
- # indexer settings
- indexer
- {
- # memory limit (default is 32M)
- mem_limit = 64M
- }
- # searchd settings
- searchd
- {
- # IP address and port on which search daemon will bind and accept
- listen = 127.0.0.1:9312
- # searchd run info is logged here - create or change the folder
- log = /var/log/sphinx/searchd.log
- # all the search queries are logged here
- query_log = /var/log/sphinx/query.log
- # client read timeout, seconds
- read_timeout = 5
- # maximum amount of children to fork
- max_children = 30
- # a file which will contain searchd process ID
- pid_file = /var/log/sphinx/searchd.pid
- # maximum amount of matches this daemon would ever retrieve
- # from each index and serve to client
- max_matches = 1000
- }
- # --eof--
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement