Advertisement
Guest User

For Andrew

a guest
Mar 19th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.89 KB | None | 0 0
  1. #
  2. # Minimal Sphinx configuration sample (clean, simple, functional)
  3. #
  4.  
  5. #############################################################################
  6. ## indexer settings
  7. #############################################################################
  8.  
  9. indexer
  10.  
  11. {
  12.  
  13. mem_limit = 64M
  14.  
  15. }
  16.  
  17. #############################################################################
  18. ## searchd settings
  19. #############################################################################
  20.  
  21. searchd
  22.  
  23. {
  24.  
  25. listen = 9312
  26. listen = 9306:mysql41
  27. log = /var/log/sphinxsearch/searchd.log
  28. query_log = /var/log/sphinxsearch/query.log
  29. read_timeout = 5
  30. client_timeout = 86400
  31. max_children = 60
  32. pid_file = /var/run/sphinxsearch/searchd.pid
  33. seamless_rotate = 1
  34. preopen_indexes = 1
  35. unlink_old = 1
  36. workers = threads # for RT to work
  37. binlog_path = /var/lib/sphinxsearch
  38. binlog_max_log_size = 2000M
  39. rt_flush_period = 600
  40. binlog_flush = 0
  41.  
  42. }
  43.  
  44. #############################################################################
  45. ## data source definition
  46. #############################################################################
  47.  
  48. source mysql
  49.  
  50. {
  51.  
  52. type = mysql
  53. sql_host = 127.0.0.1
  54. sql_user = dbuser
  55. sql_pass = dbuser
  56. sql_db = influencing
  57. sql_port = 3306 # optional, default is 3306
  58. sql_range_step = 1000
  59. sql_query_pre = SET NAMES utf8
  60.  
  61. }
  62.  
  63. source issueMedia : mysql
  64.  
  65. {
  66.  
  67. sql_query = SELECT \
  68. `issue_media`.`id`, \
  69. `issue_media`.`recognized_text` AS `content`, \
  70. `issue_media`.`issue_id` AS `issue_id`, \
  71. `issue`.`outlet_id` AS `outlet_id`, \
  72. UNIX_TIMESTAMP(`issue`.`created_at`) AS `date_added` \
  73. FROM \
  74. `issue_media` \
  75. JOIN\
  76. `issue` ON `issue_media`.`issue_id` = `issue`.`id` \
  77. WHERE \
  78. `issue_media`.`id` >= $start AND \
  79. `issue_media`.`id` <= $end AND \
  80. `issue_media`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'issue_media' ) AND \
  81. `issue_media`.`is_processed` = 1 AND \
  82. `issue_media`.`recognized_text` IS NOT NULL AND \
  83. DATE(`issue`.`created_at`) >= DATE_ADD(CURDATE(), INTERVAL -180 DAY)
  84. sql_attr_uint = outlet_id
  85. sql_attr_uint = issue_id
  86. sql_attr_timestamp = date_added
  87. sql_query_range = SELECT MIN(id), MAX(id) FROM `issue_media`
  88. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'issue_media', MAX(id) FROM `issue_media`
  89.  
  90. }
  91.  
  92. source issueMediaDelta : issueMedia
  93.  
  94. {
  95. sql_query_pre = SET NAMES utf8
  96. sql_query = SELECT \
  97. `issue_media`.`id`, \
  98. `issue_media`.`recognized_text` AS `content`, \
  99. `issue_media`.`issue_id` AS `issue_id`, \
  100. `issue`.`outlet_id` AS `outlet_id`, \
  101. UNIX_TIMESTAMP(`issue`.`created_at`) AS `date_added` \
  102. FROM `issue_media` \
  103. JOIN \
  104. `issue` ON `issue_media`.`issue_id` = `issue`.`id` \
  105. WHERE \
  106. `issue_media`.`id` >= $start AND \
  107. `issue_media`.`id` <= $end AND \
  108. `issue_media`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'issue_media' )
  109.  
  110. }
  111.  
  112. source story : mysql
  113.  
  114. {
  115.  
  116. sql_query = SELECT \
  117. `story`.`id`, \
  118. `story`.`title`, \
  119. `story`.`created_at` AS `date_added`, \
  120. `story`.`body` AS `content`, \
  121. CONCAT("6", `story`.`id`) AS `tid` \
  122. FROM `story` \
  123. WHERE \
  124. `story`.`id` >= $start AND \
  125. `story`.`id` <= $end AND \
  126. `story`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'story' )
  127. sql_attr_uint = tid
  128. sql_attr_timestamp = date_added
  129. sql_query_range = SELECT MIN(id), MAX(id) FROM `story`
  130. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'story', MAX(id) FROM `story`
  131.  
  132. }
  133.  
  134. source storyDelta : story
  135.  
  136. {
  137.  
  138. sql_query_pre = SET NAMES utf8
  139. sql_query = SELECT \
  140. `story`.`id`, \
  141. `story`.`title`, \
  142. `story`.`created_at` AS `date_added`, \
  143. `story`.`body` AS `content`, \
  144. CONCAT("6", `story`.`id`) AS `tid` \
  145. FROM `story` \
  146. WHERE \
  147. `story`.`id` >= $start AND \
  148. `story`.`id` <= $end AND \
  149. `story`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'story' )
  150.  
  151. }
  152.  
  153. source release : mysql
  154.  
  155. {
  156.  
  157. sql_query = SELECT \
  158. `press_release`.`id`, \
  159. `press_release`.`headline` AS `title`, \
  160. `press_release`.`published_at` AS `date_added`, \
  161. `press_release`.`summary` AS `content`, \
  162. `press_release`.`company_id` AS `company_id`, \
  163. CONCAT("8", `press_release`.`id`) AS `tid` \
  164. FROM `press_release` \
  165. WHERE \
  166. `press_release`.`id` >= $start AND \
  167. `press_release`.`id` <= $end AND \
  168. `press_release`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'press_release' )
  169. sql_attr_uint = tid
  170. sql_attr_uint = company_id
  171. sql_attr_timestamp = date_added
  172. sql_query_range = SELECT MIN(id), MAX(id) FROM `press_release`
  173. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'press_release', MAX(id) FROM `press_release`
  174.  
  175. }
  176.  
  177. source releaseDelta : release
  178.  
  179. {
  180.  
  181. sql_query_pre = SET NAMES utf8
  182. sql_query = SELECT \
  183. `press_release`.`id`, \
  184. `press_release`.`headline` AS `title`, \
  185. `press_release`.`published_at` AS `date_added`, \
  186. `press_release`.`summary` AS `content`, \
  187. `press_release`.`company_id` AS `company_id`, \
  188. CONCAT("8", `press_release`.`id`) AS `tid` \
  189. FROM `press_release` \
  190. WHERE \
  191. `press_release`.`id` >= $start AND \
  192. `press_release`.`id` <= $end AND \
  193. `press_release`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'press_release' )
  194.  
  195. }
  196.  
  197.  
  198.  
  199. source request : mysql
  200.  
  201. {
  202.  
  203. sql_query = SELECT \
  204. `request`.`id`, \
  205. `request`.`subject` AS `title`, \
  206. `request`.`created_at` AS `date_added`, \
  207. `request`.`description` AS `content`, \
  208. `request`.`person_id` AS `person_id`, \
  209. `request`.`outlet_id` AS `outlet_id`, \
  210. CONCAT("5", `request`.`id`) AS `tid` \
  211. FROM `request` \
  212. WHERE \
  213. `request`.`id` >= $start AND \
  214. `request`.`id` <= $end AND \
  215. `request`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'request' )
  216. sql_attr_uint = tid
  217. sql_attr_uint = person_id
  218. sql_attr_uint = outlet_id
  219. sql_attr_timestamp = date_added
  220. sql_query_range = SELECT MIN(id), MAX(id) FROM `request`
  221. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'request', MAX(id) FROM `request`
  222.  
  223. }
  224.  
  225. source requestDelta : request
  226.  
  227. {
  228.  
  229. sql_query_pre = SET NAMES utf8
  230. sql_query = SELECT \
  231. `request`.`id`, \
  232. `request`.`subject` AS `title`, \
  233. `request`.`created_at` AS `date_added`, \
  234. `request`.`description` AS `content`, \
  235. `request`.`person_id` AS `person_id`, \
  236. `request`.`outlet_id` AS `outlet_id`, \
  237. CONCAT("5", `request`.`id`) AS `tid` \
  238. FROM `request` \
  239. WHERE \
  240. `request`.`id` >= $start AND \
  241. `request`.`id` <= $end AND \
  242. `request`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'request' )
  243.  
  244. }
  245.  
  246.  
  247.  
  248. source contact : mysql
  249.  
  250. {
  251.  
  252. sql_query = SELECT \
  253. `contact`.`id`, \
  254. `contact`.`title`, \
  255. `contact`.`aliases` AS `content`, \
  256. `contact`.`created_at` AS `date_added`, \
  257. CONCAT(SUBSTRING(`contact`.`type_id`, 3, 1), `contact`.`id`) AS `tid` \
  258. FROM contact \
  259. WHERE \
  260. `contact`.`id` >= $start AND \
  261. `contact`.`id` <= $end AND \
  262. `contact`.`private_object_id` IS NULL
  263. sql_attr_uint = tid
  264. sql_attr_timestamp = date_added
  265. sql_query_range = SELECT MIN(id), MAX(id) FROM `contact`
  266. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'contact', MAX(id) FROM `contact`
  267.  
  268. }
  269.  
  270. source contactDelta : contact
  271.  
  272. {
  273.  
  274. sql_query_pre = SET NAMES utf8
  275. sql_query = SELECT \
  276. `contact`.`id`, \
  277. `contact`.`title`, \
  278. `contact`.`aliases` AS `content`, \
  279. `contact`.`created_at` AS `date_added`, \
  280. CONCAT(SUBSTRING(`contact`.`type_id`, 3, 1), `contact`.`id`) AS `tid` \
  281. FROM contact \
  282. WHERE \
  283. `contact`.`id` >= $start AND \
  284. `contact`.`id` <= $end AND \
  285. `contact`.`private_object_id` IS NULL AND \
  286. `contact`.`updated_at` > ADDDATE(NOW(), INTERVAL -2 DAY)
  287.  
  288. }
  289.  
  290. source clip : mysql
  291.  
  292. {
  293.  
  294. sql_query = SELECT \
  295. `clip`.`id`, \
  296. `clip`.`headline` AS `title`, \
  297. `clip`.`created_at` AS `date_added`, \
  298. `clip`.`synopsis` AS `content`, \
  299. `clip`.`outlet_id` AS `outlet_id`, \
  300. `clip`.`profile_id` AS `profile_id`, \
  301. GROUP_CONCAT(DISTINCT `clip_company`.`company_id`) AS `company_id`, \
  302. GROUP_CONCAT(DISTINCT `clip_journo`.`journo_id`) AS `journo_id` \
  303. FROM `clip` \
  304. LEFT JOIN `clip_company` ON (`clip`.`id` = `clip_company`.`clip_id`) \
  305. LEFT JOIN `clip_journo` ON (`clip`.`id` = `clip_journo`.`clip_id`) \
  306. WHERE \
  307. `clip`.`id` >= $start AND \
  308. `clip`.`id` <= $end AND \
  309. `clip`.`is_ignored` = 0 AND \
  310. `clip`.`is_archived` = 0 AND \
  311. `clip`.`published_at` > (UNIX_TIMESTAMP() - 86400 * 90) AND \
  312. `clip`.`id` <= ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'clip' ) \
  313. GROUP BY `clip`.`id`
  314. sql_attr_multi = uint company_id from field;
  315. sql_attr_uint = outlet_id
  316. sql_attr_uint = profile_id
  317. sql_attr_multi = uint journo_id from field;
  318. sql_attr_timestamp = date_added
  319. sql_query_range = SELECT MIN(id), MAX(id) FROM `clip`
  320. sql_query_pre = REPLACE INTO `sph_counter` SELECT 'clip', MAX(id) FROM `clip`
  321.  
  322. }
  323.  
  324. source clipDelta : clip
  325.  
  326. {
  327.  
  328. sql_query_pre = SET NAMES utf8
  329. sql_query = SELECT \
  330. `clip`.`id`, \
  331. `clip`.`headline` AS `title`, \
  332. `clip`.`created_at` AS `date_added`, \
  333. `clip`.`synopsis` AS `content`, \
  334. `clip`.`outlet_id` AS `outlet_id`, \
  335. `clip`.`profile_id` AS `profile_id`, \
  336. GROUP_CONCAT(DISTINCT `clip_company`.`company_id`) AS `company_id`, \
  337. GROUP_CONCAT(DISTINCT `clip_journo`.`journo_id`) AS `journo_id` \
  338. FROM `clip` \
  339. LEFT JOIN `clip_company` ON (`clip`.`id` = `clip_company`.`clip_id`) \
  340. LEFT JOIN `clip_journo` ON (`clip`.`id` = `clip_journo`.`clip_id`) \
  341. WHERE \
  342. `clip`.`id` >= $start AND \
  343. `clip`.`id` <= $end AND \
  344. `clip`.`is_ignored` = 0 AND \
  345. `clip`.`is_archived` = 0 AND \
  346. `clip`.`id` > ( SELECT `max_doc_id` FROM `sph_counter` WHERE `counter_id` = 'clip' ) \
  347. GROUP BY `clip`.`id`
  348.  
  349. }
  350.  
  351. source releaseContent
  352.  
  353. {
  354.  
  355. type = xmlpipe2
  356. xmlpipe_command = php /home/portal.influencing.com/batch/xmlpipe.php PressRelease
  357.  
  358. }
  359.  
  360. source releaseContentDelta
  361.  
  362. {
  363.  
  364. type = xmlpipe2
  365. xmlpipe_command = php /home/portal.influencing.com/batch/xmlpipe.php PressRelease delta
  366.  
  367. }
  368.  
  369. #############################################################################
  370. ## index definition
  371. #############################################################################
  372.  
  373. index main
  374.  
  375. {
  376.  
  377. docinfo = extern
  378. morphology = stem_en
  379. min_word_len = 2
  380. html_strip = 1
  381.  
  382. }
  383.  
  384. index issueMedia : main
  385.  
  386. {
  387.  
  388. source = issueMedia
  389. path = /sphinx/data/issueMedia
  390. morphology = stem_en
  391.  
  392. }
  393.  
  394. index issueMediaDelta : issueMedia
  395.  
  396. {
  397.  
  398. source = issueMediaDelta
  399. path = /sphinx/data/issueMediaDelta
  400.  
  401. }
  402.  
  403. index story : main
  404.  
  405. {
  406.  
  407. source = story
  408. path = /sphinx/data/story
  409. docinfo = extern
  410. morphology = stem_en
  411. min_word_len = 3
  412. html_strip = 1
  413.  
  414. }
  415.  
  416. index storyDelta : story
  417.  
  418. {
  419.  
  420. source = storyDelta
  421. path = /sphinx/data/storyDelta
  422.  
  423. }
  424.  
  425.  
  426.  
  427. index release : main
  428.  
  429. {
  430.  
  431. source = release
  432. path = /sphinx/data/release
  433.  
  434. }
  435.  
  436. index releaseDelta : main
  437.  
  438. {
  439.  
  440. source = releaseDelta
  441. path = /sphinx/data/releaseDelta
  442.  
  443. }
  444.  
  445. index request : main
  446.  
  447. {
  448.  
  449. source = request
  450. path = /sphinx/data/request
  451. morphology = none
  452. min_prefix_len = 0
  453. min_infix_len = 3
  454.  
  455. }
  456.  
  457. index requestDelta : request
  458.  
  459. {
  460.  
  461. source = requestDelta
  462. path = /sphinx/data/requestDelta
  463.  
  464. }
  465.  
  466.  
  467.  
  468. index contact : main
  469.  
  470. {
  471.  
  472. source = contact
  473. path = /sphinx/data/contact
  474. morphology = none
  475. min_prefix_len = 0
  476. min_infix_len = 3
  477.  
  478. }
  479.  
  480. index contactDelta : contact
  481.  
  482. {
  483.  
  484. source = contactDelta
  485. path = /sphinx/data/contactDelta
  486.  
  487. }
  488.  
  489. index contactFuzzy : main
  490.  
  491. {
  492.  
  493. source = contact
  494. path = /sphinx/data/contactFuzzy
  495. morphology = stem_en, soundex
  496. min_prefix_len = 0
  497. min_infix_len = 3
  498.  
  499. }
  500.  
  501. index contactFuzzyDelta : contact
  502.  
  503. {
  504.  
  505. source = contactDelta
  506. path = /sphinx/data/contactFuzzyDelta
  507.  
  508. }
  509.  
  510.  
  511.  
  512. index clip : main
  513.  
  514. {
  515.  
  516. source = clip
  517. path = /sphinx/data/clip
  518. index_exact_words = 1
  519. charset_table = 0..9, A..Z->a..z, _, a..z, &, -
  520.  
  521. }
  522.  
  523. index clipDelta : clip
  524.  
  525. {
  526.  
  527. source = clipDelta
  528. path = /sphinx/data/clipDelta
  529.  
  530. }
  531.  
  532.  
  533.  
  534. index clipSensitive : clip
  535.  
  536. {
  537.  
  538. path = /sphinx/data/clipSensitive
  539. charset_table = 0..9, A..Z, _, a..z, &, -
  540.  
  541. }
  542.  
  543. index clipSensitiveDelta : clipSensitive
  544.  
  545. {
  546.  
  547. source = clipDelta
  548. path = /sphinx/data/clipSensitiveDelta
  549.  
  550. }
  551.  
  552.  
  553.  
  554. index releaseContent : main
  555.  
  556. {
  557.  
  558. source = releaseContent
  559. path = /sphinx/data/releaseContent
  560.  
  561. }
  562.  
  563. index releaseContentDelta : main
  564.  
  565. {
  566.  
  567. source = releaseContentDelta
  568. path = /sphinx/data/releaseContentDelta
  569.  
  570. }
  571.  
  572.  
  573.  
  574.  
  575.  
  576.  
  577.  
  578. index clipContentNewRt
  579.  
  580. {
  581.  
  582. type = rt
  583. path = /sphinx/data/clipContentNewRt
  584. rt_field = content
  585. rt_attr_uint = outlet_id
  586. rt_attr_uint = profile_id
  587. # rt_attr_uint = journo_id
  588. rt_attr_multi = journo_id
  589. rt_attr_timestamp = date_added
  590. rt_mem_limit = 1048M
  591. index_exact_words = 1
  592. min_word_len = 2
  593. morphology = stem_en
  594. html_strip = 1
  595. charset_table = 0..9, A..Z->a..z, _, a..z, &, -
  596.  
  597. }
  598.  
  599. index clipContentNewRtSensitive : clipContentNewRt
  600.  
  601. {
  602.  
  603. path = /sphinx/data/clipContentNewRtSensitive
  604. charset_table = 0..9, A..Z, _, a..z, &, -
  605.  
  606. }
  607.  
  608. index issueMediaRt
  609.  
  610. {
  611.  
  612. type = rt
  613. path = /sphinx/data/issueMediaRt
  614. rt_field = content
  615. rt_attr_uint = issue_media_id
  616. rt_attr_uint = prev_issue_media_id
  617. rt_attr_uint = issue_id
  618. rt_attr_timestamp = date_added
  619. rt_mem_limit = 1048M
  620. morphology = stem_en
  621. html_strip = 1
  622. charset_table = 0..9, A..Z->a..z, _, a..z, &, -
  623.  
  624. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement