Guest User

Untitled

a guest
Dec 18th, 2018
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.00 KB | None | 0 0
  1. <?php
  2.  
  3. $aConf = parse_ini_file('conf.ini');
  4.  
  5. set_time_limit(0);
  6. ini_set('memory_limit', '700M');
  7.  
  8. function microtime_float()
  9. {
  10. list($usec, $sec) = explode(" ", microtime());
  11. return ((float) $usec + (float) $sec);
  12. }
  13.  
  14. function convert($size)
  15. {
  16. $unit = array('b', 'kb', 'mb', 'gb', 'tb', 'pb');
  17. return @round($size / pow(1024, ($i = floor(log($size, 1024)))), 2) . ' ' . $unit[$i];
  18. }
  19. $time_start = microtime_float();
  20.  
  21.  
  22. class InvicoKeywords
  23. {
  24.  
  25. /**
  26. *
  27. * @var int Max word for suggest phrase
  28. */
  29. private $_iMaxWords = 5;
  30. /**
  31. *
  32. * @var int min Word length for suggest word
  33. */
  34. private $_iMinWordLength = 2;
  35. /**
  36. *
  37. * @var array List of phrases (phrase => freq)
  38. */
  39. private $_aPhrases = array();
  40. private $_aPhrasesTest = array();
  41. /**
  42. *
  43. * @var string uggest table name
  44. */
  45. private $_tableName = 'keywords';
  46. /**
  47. *
  48. * @var type minimum freq for adding
  49. */
  50. private $_iMinFreq = 2;
  51.  
  52. /**
  53. *
  54. * @param string $name
  55. */
  56. public function setTableName($name)
  57. {
  58. $this->_tableName = $name;
  59. }
  60.  
  61. private function _addPhrase($i, $j, $aWordArray)
  62. {
  63. $sWord = '';
  64. $bAdd = true;
  65. for ($s = 0; $s <= $j; $s++) {
  66. $is = $i + $s;
  67. if (isset($aWordArray[$is])) {
  68. $sWord .= " " . $aWordArray[$is];
  69. }
  70. else {
  71. $bAdd = false;
  72. }
  73. }
  74. $sWord = trim($sWord);
  75. if ($bAdd) {
  76. if (isset($this->_aPhrases[$sWord])) {
  77. $this->_aPhrases[$sWord]++;
  78. }
  79. else {
  80. $this->_aPhrases[$sWord] = 1;
  81. }
  82. }
  83. }
  84.  
  85. private function _allowParse($word)
  86. {
  87. if (is_numeric($word)) {
  88. return false;
  89. }
  90. //elseif (strlen($word) < $this->_iMinWordLength) {
  91. elseif (!isset($word{$this->_iMinWordLength})) {
  92. return false;
  93. }
  94. return true;
  95. }
  96.  
  97. private function _parseArray($aWordArray)
  98. {
  99. $i = 0;
  100. while (isset($aWordArray[$i])) {
  101. if ($this->_allowParse($aWordArray[$i])) {
  102. $j = 0;
  103. while ($j < $this->_iMaxWords) {
  104. $this->_addPhrase($i, $j, $aWordArray);
  105. $j++;
  106. }
  107. }
  108. $i++;
  109. }
  110. }
  111.  
  112. /**
  113. * Parse givven text
  114. *
  115. * @param string $sText
  116. */
  117. public function addText($sText)
  118. {
  119. $aTextList = strtolower(trim(strip_tags($sText)));
  120.  
  121. preg_match_all("/[\w]+/", $aTextList, $matches);
  122.  
  123. $this->_parseArray($matches[0]);
  124. }
  125.  
  126. /**
  127. *
  128. * @return array
  129. */
  130. public function getKeywords()
  131. {
  132. return $this->_aPhrases;
  133. }
  134.  
  135. /**
  136. *
  137. * @return string
  138. */
  139. function getSql()
  140. {
  141. $sSql = "
  142. DROP TABLE IF EXISTS {$this->_tableName};
  143.  
  144. CREATE TABLE `{$this->_tableName}` (
  145. `keyword` varchar(255) NOT NULL,
  146. `freq` int(11) NOT NULL
  147. ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
  148. ";
  149.  
  150. $i = 0;
  151. foreach ($this->getKeywords() as $keyword => $freq) {
  152. if ($freq > $this->_iMinFreq) {
  153. if ($i != 0) {
  154. $sSql .= ",\n";
  155. }
  156. else {
  157. $sSql .= "\nINSERT INTO {$this->_tableName} VALUES ";
  158. }
  159. $sSql .= "('" . $keyword . "', $freq)";
  160. $i++;
  161. if ($i > 1000) {
  162. $sSql .= ";\n";
  163. $i = 0;
  164. }
  165. }
  166. }
  167.  
  168. return $sSql;
  169. }
  170.  
  171. /**
  172. * Sending data to php://stdout
  173. */
  174. public function sendToStdout()
  175. {
  176. $out = fopen("php://stdout", "w+");
  177.  
  178. fwrite($out, $this->getSql());
  179.  
  180. fclose($out);
  181. }
  182.  
  183. }
  184.  
  185. mysql_connect($aConf['mysql.host'], $aConf['mysql.user'], $aConf['mysql.pass']);
  186. mysql_select_db($aConf['mysql.dbname']);
  187.  
  188. $i = mysql_fetch_object(mysql_query($aConf['query.count']));
  189. $chunks = ceil($i->number / $aConf['limit']);
  190.  
  191. for ($i=0; $i<$chunks; $i++) {
  192. $from = $i * $aConf['limit'];
  193. $oResult = mysql_query($aConf['query.data'] . " limit $from," . $aConf['limit']);
  194. $oKeywordParser = new InvicoKeywords();
  195. while ($obj = mysql_fetch_object($oResult)) {
  196. $oKeywordParser->addText($obj->mytext);
  197. unset($obj);
  198. }
  199. }
  200. mysql_free_result($oResult);
  201.  
  202. $oKeywordParser->sendToStdout();
  203.  
  204. $time_end = microtime_float();
  205. $time = $time_end - $time_start;
  206.  
  207. //echo "Did nothing in $time seconds\n";
  208. //echo "Peak: " . convert(memory_get_peak_usage(true)) . " Usage: " . convert(memory_get_usage(true)) . "\n";
Add Comment
Please, Sign In to add comment