Advertisement
Guest User

Sphinx

a guest
Oct 23rd, 2012
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 11.32 KB | None | 0 0
  1. <?php
  2.  
  3. /**
  4.  * This file copyright (C) 2010 Barry Hunter (sphinx@barryhunter.co.uk)
  5.  *
  6.  * This program is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU General Public License
  8.  * as published by the Free Software Foundation; either version 2
  9.  * of the License, or (at your option) any later version.
  10.  *
  11.  * This program is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14.  * GNU General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU General Public License
  17.  * along with this program; if not, write to the Free Software
  18.  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19.  */
  20.  
  21. # Version 0.1 - First release (very basic and not fully functional)
  22. # Version 0.2 - Made the Configuration section usuable
  23. # Version 0.3 - (withdrawn - had broken implementation)
  24. # Version 0.4 - Added support for highlighted excerpts/snippets body in the results
  25. # Version 0.5 - support for paging! (and configuable page size)
  26.  
  27. # See a running demo of THIS code: http://www.nearby.org.uk/sphinx/example5.php?q=test
  28. #  nothing changed, other than hooking it up with a sphinx index, and mysql database.
  29.  
  30. ######################
  31. # Change this settings to match your setup...
  32.  
  33. $CONF = array();
  34.  
  35. $CONF['sphinx_host'] = 'localhost';
  36. $CONF['sphinx_port'] = 9312; //this demo uses the SphinxAPI interface
  37. $CONF['sphinx_index'] = "*"; // can also be a list of indexes, "main, delta"
  38.  
  39. #can use 'excerpt' to highlight using the query, or 'asis' to show description as is.
  40. $CONF['body'] = 'excerpt';
  41.  
  42. #the link for the title (only $id) placeholder supported
  43. $CONF['link_format'] = '/page.php?page_id=$id';
  44.  
  45. #Change this to FALSE on a live site!
  46. $CONF['debug'] = TRUE;
  47.  
  48. #How many results per page
  49. $CONF['page_size'] = 25;
  50.  
  51. #maximum number of results - should match sphinxes max_matches. default 1000
  52. $CONF['max_matches'] = 1000;
  53.  
  54.  
  55. ######################
  56. #mysql query to fetch results, needs `id`, `title` and `body` columns in the final result.
  57. #$ids is replaced by the list of ids
  58. #this query can be as arbitary complex as required - but mysql has be able to run it quickly
  59.  
  60. #DO NOT include a order by (but if use GROUP BY, put ORDER BY NULL) - the order of the results doesnt matter
  61.  
  62. #TIP can also do :: CONCAT(description,' Category:',category) AS body :: for exmaple
  63.  
  64. $CONF['mysql_query'] = '
  65. SELECT ID as id,Title AS title, Description AS body
  66. FROM articles
  67. ';
  68.  
  69. #might need to put in path to your file
  70. if (!empty($_GET['q'])) require("api/sphinxapi.php");
  71.  
  72. ######################
  73. # change the look and feel
  74.  
  75. ?>
  76. <style type="text/css">
  77. form#search {
  78.     background-color:silver:
  79.     padding:10px;
  80. }
  81. ul.results {
  82.     border:1px solid silver;
  83. }
  84. .results li {
  85.     font-size:0.9em;
  86. }
  87. .results li a {
  88.     font-weight:bold;
  89.     font-size:1.2em;
  90. }
  91.  
  92. .pages a {
  93.     color:brown;
  94.     text-decoration: none;
  95.     padding:4px;
  96.     margin:2px;
  97.     border:1px solid silver;
  98.     background-color:#eeeeee;
  99. }
  100. .pages b {
  101.     padding:4px;
  102.     margin:2px;
  103.     background-color:#eeeeee;
  104. }
  105.  
  106. </style>
  107. <?php
  108.  
  109. ##################################################################
  110. ##################################################################
  111. #
  112. # Nothing below should need changing - should work as is
  113. #  but of course this is only a basic demo, can customise it to your needs
  114. #
  115.  
  116.  
  117.  
  118. //Sanitise the input
  119. $q = isset($_GET['q'])?$_GET['q']:'';
  120.  
  121. $q = preg_replace('/ OR /',' | ',$q);
  122.  
  123. $q = preg_replace('/[^\w~\|\(\)"\/=-]+/',' ',trim(strtolower($q)));
  124.  
  125. //Display the HTML search form
  126. ?>
  127.     <form action="sphinx-test.php" method="get" id="search">
  128.         Search: <input name="q" type="text" value="<? echo htmlentities($q); ?>"/>
  129.         <input type="submit" value="Search"/>
  130.     </form>
  131. <?php
  132.  
  133. //If the user entered something
  134. if (!empty($q)) {
  135.     //produce a version for display
  136.     $qo = $q;
  137.     if (strlen($qo) > 64) {
  138.         $qo = '--complex query--';
  139.     }
  140.    
  141.     if (1) {
  142.         //Choose an appriate mode (depending on the query)
  143.         $mode = SPH_MATCH_ALL;
  144.         if (strpos($q,'~') === 0) {
  145.             $q = preg_replace('/^\~/','',$q);
  146.             if (substr_count($q,' ') > 1) //over 2 words
  147.                 $mode = SPH_MATCH_ANY;
  148.         } elseif (preg_match('/[\|\(\)"\/=-]/',$q)) {
  149.             $mode = SPH_MATCH_EXTENDED;
  150.         }
  151.        
  152.         //setup paging...
  153.         if (!empty($_GET['page'])) {
  154.             $currentPage = intval($_GET['page']);
  155.             if (empty($currentPage) || $currentPage < 1) {$currentPage = 1;}
  156.            
  157.             $currentOffset = ($currentPage -1)* $CONF['page_size'];
  158.            
  159.             if ($currentOffset > ($CONF['max_matches']-$CONF['page_size']) ) {
  160.                 die("Only the first {$CONF['max_matches']} results accessible");
  161.             }
  162.         } else {
  163.             $currentPage = 1;
  164.             $currentOffset = 0;
  165.         }
  166.        
  167.         //Connect to sphinx, and run the query
  168.         $cl = new SphinxClient();
  169.         $cl->SetServer($CONF['sphinx_host'], $CONF['sphinx_port']);
  170.         $cl->SetSortMode(SPH_SORT_EXTENDED, "@relevance DESC, @id DESC");
  171.         $cl->SetMatchMode($mode);
  172.         $cl->SetLimits($currentOffset,$CONF['page_size']); //current page and number of results
  173.        
  174.         $res = $cl->Query($q, $CONF['sphinx_index']);
  175.        
  176.         //Check for failure
  177.         if (empty($res)) {
  178.             print "Query failed: -- please try again later.\n";
  179.             if ($CONF['debug'] && $cl->GetLastError())
  180.                 print "<br/>Error: ".$cl->GetLastError()."\n\n";
  181.             return;
  182.         } else {
  183.             //We have results to display!
  184.             if ($CONF['debug'] && $cl->GetLastWarning())
  185.                 print "<br/>WARNING: ".$cl->GetLastWarning()."\n\n";
  186.             $query_info = "Query '".htmlentities($qo)."' retrieved ".count($res['matches'])." of $res[total_found] matches in $res[time] sec.\n";
  187.            
  188.             $resultCount = $res['total_found'];
  189.             $numberOfPages = ceil($res['total']/$CONF['page_size']);
  190.         }
  191.        
  192.         if (is_array($res["matches"])) {
  193.             //Build a list of IDs for use in the mysql Query and looping though the results
  194.             $ids = array_keys($res["matches"]);
  195.         } else {
  196.             print "<pre class=\"results\">No Results for '".htmlentities($qo)."'</pre>";
  197.         }
  198.     }
  199.    
  200.     //We have results to display
  201.     if (!empty($ids)) {
  202.  
  203.         //Setup Database Connection
  204.         $db = mysql_connect($CONF['mysql_host'],$CONF['mysql_username'],$CONF['mysql_password']) or die("ERROR: unable to connect to database");
  205.         mysql_select_db($CONF['mysql_database'], $db) or die("ERROR: unable to select database");
  206.        
  207.         //Run the Mysql Query
  208.         $sql = str_replace('$ids',implode(',',$ids),$CONF['mysql_query']);
  209.         $result = mysql_query($sql) or die($CONF['debug']?("ERROR: mysql query failed: ".mysql_error()):"ERROR: Please try later");
  210.        
  211.         if (mysql_num_rows($result) > 0) {
  212.  
  213.             //Fetch Results from Mysql (Store in an accociative array, because they wont be in the right order)
  214.             $rows = array();
  215.             while ($row = mysql_fetch_array($result,MYSQL_ASSOC)) {
  216.                 $rows[$row['id']] = $row;
  217.             }
  218.  
  219.             //Call Sphinxes BuildExcerpts function
  220.             if ($CONF['body'] == 'excerpt') {
  221.                 $docs = array();
  222.                 foreach ($ids as $c => $id) {
  223.                     $docs[$c] = strip_tags($rows[$id]['body']);
  224.                 }
  225.                 $reply = $cl->BuildExcerpts($docs, $CONF['sphinx_index'], $q);
  226.             }
  227.            
  228.             if ($numberOfPages > 1 && $currentPage > 1) {
  229.                 print "<p class='pages'>".pagesString($currentPage,$numberOfPages)."</p>";
  230.             }
  231.            
  232.             //Actully display the Results
  233.             print "<ol class=\"results\" start=\"".($currentOffset+1)."\">";
  234.             foreach ($ids as $c => $id) {
  235.                 $row = $rows[$id];
  236.                
  237.                 $link = htmlentities(str_replace('$id',$row['id'],$CONF['link_format']));
  238.                 print "<li><a href=\"$link\">".htmlentities($row['title'])."</a><br/>";
  239.                
  240.                 if ($CONF['body'] == 'excerpt' && !empty($reply[$c]))
  241.                     print ($reply[$c])."</li>";
  242.                 else
  243.                     print htmlentities($row['body'])."</li>";
  244.             }
  245.             print "</ol>";
  246.            
  247.             if ($numberOfPages > 1) {
  248.                 print "<p class='pages'>Page $currentPage of $numberOfPages. ";
  249.                 printf("Result %d..%d of %d. ",($currentOffset)+1,min(($currentOffset)+$CONF['page_size'],$resultCount),$resultCount);
  250.                 print pagesString($currentPage,$numberOfPages)."</p>";
  251.             }
  252.            
  253.             print "<pre class=\"results\">$query_info</pre>";
  254.  
  255.         } else {
  256.  
  257.             //Error Message
  258.             print "<pre class=\"results\">Unable to get results for '".htmlentities($qo)."'</pre>";
  259.  
  260.         }
  261.     }
  262. }
  263.  
  264.  
  265.  
  266. #########################################
  267. # Functions
  268. # Created by Barry Hunter for use in the geograph.org.uk project, reused here because convenient :)
  269.  
  270. function linktoself($params,$selflink= '') {
  271.     $a = array();
  272.     $b = explode('?',$_SERVER['REQUEST_URI']);
  273.     if (isset($b[1]))
  274.         parse_str($b[1],$a);
  275.  
  276.     if (isset($params['value']) && isset($a[$params['name']])) {
  277.         if ($params['value'] == 'null') {
  278.             unset($a[$params['name']]);
  279.         } else {
  280.             $a[$params['name']] = $params['value'];
  281.         }
  282.  
  283.     } else {
  284.         foreach ($params as $key => $value)
  285.             $a[$key] = $value;
  286.     }
  287.  
  288.     if (!empty($params['delete'])) {
  289.         if (is_array($params['delete'])) {
  290.             foreach ($params['delete'] as $del) {
  291.                 unset($a[$del]);
  292.             }
  293.         } else {
  294.             unset($a[$params['delete']]);
  295.         }
  296.         unset($a['delete']);
  297.     }
  298.     if (empty($selflink)) {
  299.         $selflink = $_SERVER['SCRIPT_NAME'];
  300.     }
  301.     if ($selflink == '/index.php') {
  302.         $selflink = '/';
  303.     }
  304.  
  305.     return htmlentities($selflink.(count($a)?("?".http_build_query($a,'','&')):''));
  306. }
  307.  
  308.  
  309. function pagesString($currentPage,$numberOfPages,$postfix = '',$extrahtml ='') {
  310.     static $r;
  311.     if (!empty($r))
  312.         return($r);
  313.  
  314.     if ($currentPage > 1)
  315.         $r .= "<a href=\"".linktoself(array('page'=>$currentPage-1))."$postfix\"$extrahtml>&lt; &lt; prev</a> ";
  316.     $start = max(1,$currentPage-5);
  317.     $endr = min($numberOfPages+1,$currentPage+8);
  318.  
  319.     if ($start > 1)
  320.         $r .= "<a href=\"".linktoself(array('page'=>1))."$postfix\"$extrahtml>1</a> ... ";
  321.  
  322.     for($index = $start;$index<$endr;$index++) {
  323.         if ($index == $currentPage)
  324.             $r .= "<b>$index</b> ";
  325.         else
  326.             $r .= "<a href=\"".linktoself(array('page'=>$index))."$postfix\"$extrahtml>$index</a> ";
  327.     }
  328.     if ($endr < $numberOfPages+1)
  329.         $r .= "... ";
  330.  
  331.     if ($numberOfPages > $currentPage)
  332.         $r .= "<a href=\"".linktoself(array('page'=>$currentPage+1))."$postfix\"$extrahtml>next &gt;&gt;</a> ";
  333.  
  334.     return $r;
  335. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement