Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- $aConf = parse_ini_file('conf.ini');
- set_time_limit(0);
- ini_set('memory_limit', '700M');
- function microtime_float()
- {
- list($usec, $sec) = explode(" ", microtime());
- return ((float) $usec + (float) $sec);
- }
- function convert($size)
- {
- $unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
- return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
- }
- $time_start = microtime_float();
- class InvicoKeywords
- {
- /**
- *
- * @var int Max word for suggest phrase
- */
- private $_iMaxWords = 5;
- /**
- *
- * @var int min Word length for suggest word
- */
- private $_iMinWordLength = 2;
- /**
- *
- * @var array List of phrases (phrase => freq)
- */
- private $_aPhrases = array();
- private $_aPhrasesTest = array();
- /**
- *
- * @var string uggest table name
- */
- private $_tableName = 'keywords';
- /**
- *
- * @var type minimum freq for adding
- */
- private $_iMinFreq = 2;
- /**
- *
- * @param string $name
- */
- public function setTableName($name)
- {
- $this->_tableName = $name;
- }
- private function _addPhrase($i, $j, $aWordArray)
- {
- $sWord = '';
- $bAdd = true;
- for ($s = 0; $s <= $j; $s++) {
- $is = $i + $s;
- if (isset($aWordArray[$is])) {
- $sWord .= " " . $aWordArray[$is];
- }
- else {
- $bAdd = false;
- }
- }
- $sWord = trim($sWord);
- if ($bAdd) {
- if (isset($this->_aPhrases[$sWord])) {
- $this->_aPhrases[$sWord]++;
- }
- else {
- $this->_aPhrases[$sWord] = 1;
- }
- }
- }
- private function _allowParse($word)
- {
- if (is_numeric($word)) {
- return false;
- }
- //elseif (strlen($word) < $this->_iMinWordLength) {
- elseif (!isset($word{$this->_iMinWordLength})) {
- return false;
- }
- return true;
- }
- private function _parseArray($aWordArray)
- {
- $i = 0;
- while (isset($aWordArray[$i])) {
- if ($this->_allowParse($aWordArray[$i])) {
- $j = 0;
- while ($j < $this->_iMaxWords) {
- $this->_addPhrase($i, $j, $aWordArray);
- $j++;
- }
- }
- $i++;
- }
- }
- /**
- * Parse givven text
- *
- * @param string $sText
- */
- public function addText($sText)
- {
- $aTextList = strtolower(trim(strip_tags($sText)));
- preg_match_all("/[\w]+/", $aTextList, $matches);
- $this->_parseArray($matches[0]);
- }
- /**
- *
- * @return array
- */
- public function getKeywords()
- {
- return $this->_aPhrases;
- }
- /**
- *
- * @return string
- */
- function getSql()
- {
- $sSql = "
- DROP TABLE IF EXISTS {$this->_tableName};
- CREATE TABLE `{$this->_tableName}` (
- `keyword` varchar(255) NOT NULL,
- `freq` int(11) NOT NULL
- ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
- ";
- $i = 0;
- foreach ($this->getKeywords() as $keyword => $freq) {
- if ($freq > $this->_iMinFreq) {
- if ($i != 0) {
- $sSql .= ",\n";
- }
- else {
- $sSql .= "\nINSERT INTO {$this->_tableName} VALUES ";
- }
- $sSql .= "('" . $keyword . "', $freq)";
- $i++;
- if ($i > 1000) {
- $sSql .= ";\n";
- $i = 0;
- }
- }
- }
- return $sSql;
- }
- /**
- * Sending data to php://stdout
- */
- public function sendToStdout()
- {
- $out = fopen("php://stdout", "w+");
- fwrite($out, $this->getSql());
- fclose($out);
- }
- }
- mysql_connect($aConf['mysql.host'], $aConf['mysql.user'], $aConf['mysql.pass']);
- mysql_select_db($aConf['mysql.dbname']);
- $i = mysql_fetch_object(mysql_query($aConf['query.count']));
- $chunks = ceil($i->number / $aConf['limit']);
- for ($i=0; $i<$chunks; $i++) {
- $from = $i * $aConf['limit'];
- $oResult = mysql_query($aConf['query.data'] . " limit $from," . $aConf['limit']);
- $oKeywordParser = new InvicoKeywords();
- while ($obj = mysql_fetch_object($oResult)) {
- $oKeywordParser->addText($obj->mytext);
- unset($obj);
- }
- }
- mysql_free_result($oResult);
- $oKeywordParser->sendToStdout();
- $time_end = microtime_float();
- $time = $time_end - $time_start;
- //echo "Did nothing in $time seconds\n";
- //echo "Peak: " . convert(memory_get_peak_usage(true)) . " Usage: " . convert(memory_get_usage(true)) . "\n";
Add Comment
Please, Sign In to add comment