Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # Minimal Sphinx configuration sample (clean, simple, functional)
- #
- #############################################################################
- ## indexer settings
- #############################################################################
- indexer
- {
- mem_limit = 64M
- }
- #############################################################################
- ## searchd settings
- #############################################################################
- searchd
- {
- listen = 9312
- listen = 9306:mysql41
- log = /var/log/sphinxsearch/searchd.log
- query_log = /var/log/sphinxsearch/query.log
- read_timeout = 5
- client_timeout = 86400
- max_children = 60
- pid_file = /var/run/sphinxsearch/searchd.pid
- seamless_rotate = 1
- preopen_indexes = 1
- unlink_old = 1
- workers = threads # for RT to work
- binlog_path = /var/lib/sphinxsearch
- binlog_max_log_size = 2000M
- rt_flush_period = 600
- binlog_flush = 0
- }
- #############################################################################
- ## data source definition
- #############################################################################
- source mysql
- {
- type = mysql
- sql_host = 127.0.0.1
- sql_user = dbuser
- sql_pass = dbuser
- sql_db = influencing
- sql_port = 3306 # optional, default is 3306
- sql_range_step = 1000
- sql_query_pre = SET NAMES utf8
- }
- source issueMedia : mysql
- {
- sql_query = SELECT \
- `issue_media`.`id`, \
- `issue_media`.`recognized_text` AS `content`, \
- `issue_media`.`issue_id` AS `issue_id`, \
- `issue`.`outlet_id` AS `outlet_id`, \
- UNIX_TIMESTAMP(`issue`.`created_at`) AS `date_added` \
- FROM \
- `issue_media` \
- JOIN\
- `issue` ON `issue_media`.`issue_id` = `issue`.`id` \
- WHERE \
- `issue_media`.`id` >= $start AND \
- `issue_media`.`id` <= $end AND \
- `issue_media`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'issue_media' ) AND \
- `issue_media`.`is_processed` = 1 AND \
- `issue_media`.`recognized_text` IS NOT NULL AND \
- DATE(`issue`.`created_at`) >= DATE_ADD(CURDATE(), INTERVAL -180 DAY)
- sql_attr_uint = outlet_id
- sql_attr_uint = issue_id
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `issue_media`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'issue_media', MAX(id) FROM `issue_media`
- }
- source issueMediaDelta : issueMedia
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `issue_media`.`id`, \
- `issue_media`.`recognized_text` AS `content`, \
- `issue_media`.`issue_id` AS `issue_id`, \
- `issue`.`outlet_id` AS `outlet_id`, \
- UNIX_TIMESTAMP(`issue`.`created_at`) AS `date_added` \
- FROM `issue_media` \
- JOIN \
- `issue` ON `issue_media`.`issue_id` = `issue`.`id` \
- WHERE \
- `issue_media`.`id` >= $start AND \
- `issue_media`.`id` <= $end AND \
- `issue_media`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'issue_media' )
- }
- source story : mysql
- {
- sql_query = SELECT \
- `story`.`id`, \
- `story`.`title`, \
- `story`.`created_at` AS `date_added`, \
- `story`.`body` AS `content`, \
- CONCAT("6", `story`.`id`) AS `tid` \
- FROM `story` \
- WHERE \
- `story`.`id` >= $start AND \
- `story`.`id` <= $end AND \
- `story`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'story' )
- sql_attr_uint = tid
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `story`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'story', MAX(id) FROM `story`
- }
- source storyDelta : story
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `story`.`id`, \
- `story`.`title`, \
- `story`.`created_at` AS `date_added`, \
- `story`.`body` AS `content`, \
- CONCAT("6", `story`.`id`) AS `tid` \
- FROM `story` \
- WHERE \
- `story`.`id` >= $start AND \
- `story`.`id` <= $end AND \
- `story`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'story' )
- }
- source release : mysql
- {
- sql_query = SELECT \
- `press_release`.`id`, \
- `press_release`.`headline` AS `title`, \
- `press_release`.`published_at` AS `date_added`, \
- `press_release`.`summary` AS `content`, \
- `press_release`.`company_id` AS `company_id`, \
- CONCAT("8", `press_release`.`id`) AS `tid` \
- FROM `press_release` \
- WHERE \
- `press_release`.`id` >= $start AND \
- `press_release`.`id` <= $end AND \
- `press_release`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'press_release' )
- sql_attr_uint = tid
- sql_attr_uint = company_id
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `press_release`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'press_release', MAX(id) FROM `press_release`
- }
- source releaseDelta : release
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `press_release`.`id`, \
- `press_release`.`headline` AS `title`, \
- `press_release`.`published_at` AS `date_added`, \
- `press_release`.`summary` AS `content`, \
- `press_release`.`company_id` AS `company_id`, \
- CONCAT("8", `press_release`.`id`) AS `tid` \
- FROM `press_release` \
- WHERE \
- `press_release`.`id` >= $start AND \
- `press_release`.`id` <= $end AND \
- `press_release`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'press_release' )
- }
- source request : mysql
- {
- sql_query = SELECT \
- `request`.`id`, \
- `request`.`subject` AS `title`, \
- `request`.`created_at` AS `date_added`, \
- `request`.`description` AS `content`, \
- `request`.`person_id` AS `person_id`, \
- `request`.`outlet_id` AS `outlet_id`, \
- CONCAT("5", `request`.`id`) AS `tid` \
- FROM `request` \
- WHERE \
- `request`.`id` >= $start AND \
- `request`.`id` <= $end AND \
- `request`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'request' )
- sql_attr_uint = tid
- sql_attr_uint = person_id
- sql_attr_uint = outlet_id
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `request`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'request', MAX(id) FROM `request`
- }
- source requestDelta : request
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `request`.`id`, \
- `request`.`subject` AS `title`, \
- `request`.`created_at` AS `date_added`, \
- `request`.`description` AS `content`, \
- `request`.`person_id` AS `person_id`, \
- `request`.`outlet_id` AS `outlet_id`, \
- CONCAT("5", `request`.`id`) AS `tid` \
- FROM `request` \
- WHERE \
- `request`.`id` >= $start AND \
- `request`.`id` <= $end AND \
- `request`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'request' )
- }
- source contact : mysql
- {
- sql_query = SELECT \
- `contact`.`id`, \
- `contact`.`title`, \
- `contact`.`aliases` AS `content`, \
- `contact`.`created_at` AS `date_added`, \
- CONCAT(SUBSTRING(`contact`.`type_id`, 3, 1), `contact`.`id`) AS `tid` \
- FROM contact \
- WHERE \
- `contact`.`id` >= $start AND \
- `contact`.`id` <= $end AND \
- `contact`.`private_object_id` IS NULL
- sql_attr_uint = tid
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `contact`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'contact', MAX(id) FROM `contact`
- }
- source contactDelta : contact
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `contact`.`id`, \
- `contact`.`title`, \
- `contact`.`aliases` AS `content`, \
- `contact`.`created_at` AS `date_added`, \
- CONCAT(SUBSTRING(`contact`.`type_id`, 3, 1), `contact`.`id`) AS `tid` \
- FROM contact \
- WHERE \
- `contact`.`id` >= $start AND \
- `contact`.`id` <= $end AND \
- `contact`.`private_object_id` IS NULL AND \
- `contact`.`updated_at` > ADDDATE(NOW(), INTERVAL -2 DAY)
- }
- source clip : mysql
- {
- sql_query = SELECT \
- `clip`.`id`, \
- `clip`.`headline` AS `title`, \
- `clip`.`created_at` AS `date_added`, \
- `clip`.`synopsis` AS `content`, \
- `clip`.`outlet_id` AS `outlet_id`, \
- `clip`.`profile_id` AS `profile_id`, \
- GROUP_CONCAT(DISTINCT `clip_company`.`company_id`) AS `company_id`, \
- GROUP_CONCAT(DISTINCT `clip_journo`.`journo_id`) AS `journo_id` \
- FROM `clip` \
- LEFT JOIN `clip_company` ON (`clip`.`id` = `clip_company`.`clip_id`) \
- LEFT JOIN `clip_journo` ON (`clip`.`id` = `clip_journo`.`clip_id`) \
- WHERE \
- `clip`.`id` >= $start AND \
- `clip`.`id` <= $end AND \
- `clip`.`is_ignored` = 0 AND \
- `clip`.`is_archived` = 0 AND \
- `clip`.`published_at` > (UNIX_TIMESTAMP() - 86400 * 90) AND \
- `clip`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'clip' ) \
- GROUP BY `clip`.`id`
- sql_attr_multi = uint company_id from field;
- sql_attr_uint = outlet_id
- sql_attr_uint = profile_id
- sql_attr_multi = uint journo_id from field;
- sql_attr_timestamp = date_added
- sql_query_range = SELECT MIN(id), MAX(id) FROM `clip`
- sql_query_pre = REPLACE INTO `sph_counter` SELECT 'clip', MAX(id) FROM `clip`
- }
- source clipDelta : clip
- {
- sql_query_pre = SET NAMES utf8
- sql_query = SELECT \
- `clip`.`id`, \
- `clip`.`headline` AS `title`, \
- `clip`.`created_at` AS `date_added`, \
- `clip`.`synopsis` AS `content`, \
- `clip`.`outlet_id` AS `outlet_id`, \
- `clip`.`profile_id` AS `profile_id`, \
- GROUP_CONCAT(DISTINCT `clip_company`.`company_id`) AS `company_id`, \
- GROUP_CONCAT(DISTINCT `clip_journo`.`journo_id`) AS `journo_id` \
- FROM `clip` \
- LEFT JOIN `clip_company` ON (`clip`.`id` = `clip_company`.`clip_id`) \
- LEFT JOIN `clip_journo` ON (`clip`.`id` = `clip_journo`.`clip_id`) \
- WHERE \
- `clip`.`id` >= $start AND \
- `clip`.`id` <= $end AND \
- `clip`.`is_ignored` = 0 AND \
- `clip`.`is_archived` = 0 AND \
- `clip`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'clip' ) \
- GROUP BY `clip`.`id`
- }
- source releaseContent
- {
- type = xmlpipe2
- xmlpipe_command = php /home/portal.influencing.com/batch/xmlpipe.php PressRelease
- }
- source releaseContentDelta
- {
- type = xmlpipe2
- xmlpipe_command = php /home/portal.influencing.com/batch/xmlpipe.php PressRelease delta
- }
- #############################################################################
- ## index definition
- #############################################################################
- index main
- {
- docinfo = extern
- morphology = stem_en
- min_word_len = 2
- html_strip = 1
- }
- index issueMedia : main
- {
- source = issueMedia
- path = /sphinx/data/issueMedia
- morphology = stem_en
- }
- index issueMediaDelta : issueMedia
- {
- source = issueMediaDelta
- path = /sphinx/data/issueMediaDelta
- }
- index story : main
- {
- source = story
- path = /sphinx/data/story
- docinfo = extern
- morphology = stem_en
- min_word_len = 3
- html_strip = 1
- }
- index storyDelta : story
- {
- source = storyDelta
- path = /sphinx/data/storyDelta
- }
- index release : main
- {
- source = release
- path = /sphinx/data/release
- }
- index releaseDelta : main
- {
- source = releaseDelta
- path = /sphinx/data/releaseDelta
- }
- index request : main
- {
- source = request
- path = /sphinx/data/request
- morphology = none
- min_prefix_len = 0
- min_infix_len = 3
- }
- index requestDelta : request
- {
- source = requestDelta
- path = /sphinx/data/requestDelta
- }
- index contact : main
- {
- source = contact
- path = /sphinx/data/contact
- morphology = none
- min_prefix_len = 0
- min_infix_len = 3
- }
- index contactDelta : contact
- {
- source = contactDelta
- path = /sphinx/data/contactDelta
- }
- index contactFuzzy : main
- {
- source = contact
- path = /sphinx/data/contactFuzzy
- morphology = stem_en, soundex
- min_prefix_len = 0
- min_infix_len = 3
- }
- index contactFuzzyDelta : contact
- {
- source = contactDelta
- path = /sphinx/data/contactFuzzyDelta
- }
- index clip : main
- {
- source = clip
- path = /sphinx/data/clip
- index_exact_words = 1
- charset_table = 0..9, A..Z->a..z, _, a..z, &, -
- }
- index clipDelta : clip
- {
- source = clipDelta
- path = /sphinx/data/clipDelta
- }
- index clipSensitive : clip
- {
- path = /sphinx/data/clipSensitive
- charset_table = 0..9, A..Z, _, a..z, &, -
- }
- index clipSensitiveDelta : clipSensitive
- {
- source = clipDelta
- path = /sphinx/data/clipSensitiveDelta
- }
- index releaseContent : main
- {
- source = releaseContent
- path = /sphinx/data/releaseContent
- }
- index releaseContentDelta : main
- {
- source = releaseContentDelta
- path = /sphinx/data/releaseContentDelta
- }
- index clipContentNewRt
- {
- type = rt
- path = /sphinx/data/clipContentNewRt
- rt_field = content
- rt_attr_uint = outlet_id
- rt_attr_uint = profile_id
- # rt_attr_uint = journo_id
- rt_attr_multi = journo_id
- rt_attr_timestamp = date_added
- rt_mem_limit = 1048M
- index_exact_words = 1
- min_word_len = 2
- morphology = stem_en
- html_strip = 1
- charset_table = 0..9, A..Z->a..z, _, a..z, &, -
- }
- index clipContentNewRtSensitive : clipContentNewRt
- {
- path = /sphinx/data/clipContentNewRtSensitive
- charset_table = 0..9, A..Z, _, a..z, &, -
- }
- index issueMediaRt
- {
- type = rt
- path = /sphinx/data/issueMediaRt
- rt_field = content
- rt_attr_uint = issue_media_id
- rt_attr_uint = prev_issue_media_id
- rt_attr_uint = issue_id
- rt_attr_timestamp = date_added
- rt_mem_limit = 1048M
- morphology = stem_en
- html_strip = 1
- charset_table = 0..9, A..Z->a..z, _, a..z, &, -
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement