Advertisement
Guest User

RC traffic stats calculation

a guest
Jul 12th, 2015
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 6.55 KB | None | 0 0
  1. <?php
  2.  
  3. define('STATS_MONTH', '06');    // MM
  4. define('STATS_YEAR',  '2015');  // YYYY
  5. define('STATS_LANG',  'de');    // "en", "de", "fr", etc.
  6.  
  7. $articles = array('4-Hydroxycumarine',
  8.                   [..................]
  9.           'Refraktärmetalle');
  10.  
  11. // ---------------------------------------------
  12. // obviously, configurable stuff ends here
  13. // ---------------------------------------------
  14.  
  15. define('CHUNK_SIZE',  50);  // articles
  16. define('CHUNK_SLEEP', 3);   // seconds
  17.  
  18. set_time_limit(0);
  19. ini_set('memory_limit', 67108864);
  20. ini_set('default_socket_timeout', 90);
  21.  
  22. // a few small helper functions
  23.  
  24. function plural_output($value, $unit) {
  25.     return (number_format($value) . " {$unit}" . ((abs($value) != 1) ? 's' : ''));
  26. }
  27.  
  28. function progress_message($message = '.') {
  29.     static $last_message = null;
  30.  
  31.     $now     = microtime(true);
  32.     $ret_val = false;
  33.  
  34.     if (($last_message === null) ||
  35.         (($now - $last_message) > 0.5)) {   // one message every 0.5 seconds
  36.         echo($message);
  37.  
  38.         $last_message = $now;
  39.         $ret_val      = true;   // the message was printed
  40.     }
  41.  
  42.     return ($ret_val);
  43. }
  44.  
  45. // prepare the cURL handles for all articles
  46.  
  47. echo("\nFetching statistics data: ");
  48.  
  49. $start_time  = microtime(true);
  50. $handles     = array();
  51. $articles_no = count($articles);
  52. $curr_month  = (STATS_MONTH == @date('m'));
  53.  
  54. if ($articles_no == 0)      // a small sanity check
  55.     die("no articles specified!\n");
  56.  
  57. if ($curr_month && (@date('j') == 1))       // only the whole days are accounted
  58.     die("no elapsed days in current month!\n");
  59.  
  60. for ($id = 0; $id < $articles_no; $id++) {
  61.     $handles[$id] = curl_init();
  62.  
  63.     curl_setopt($handles[$id], CURLOPT_URL, 'http://stats.grok.se/json/' . STATS_LANG .
  64.                                             '/' . STATS_YEAR . STATS_MONTH .
  65.                                             '/' . str_replace('%2F', '/', rawurlencode($articles[$id])));
  66.  
  67.     curl_setopt($handles[$id], CURLOPT_HEADER, false);
  68.     curl_setopt($handles[$id], CURLOPT_RETURNTRANSFER, true);
  69.  
  70.     curl_setopt($handles[$id], CURLOPT_CONNECTTIMEOUT, 20);
  71.     curl_setopt($handles[$id], CURLOPT_TIMEOUT, 60);
  72.     curl_setopt($handles[$id], CURLOPT_DNS_CACHE_TIMEOUT, 3600);
  73.  
  74.     curl_setopt($handles[$id], CURLOPT_FORBID_REUSE, false);
  75.     curl_setopt($handles[$id], CURLOPT_FRESH_CONNECT, false);
  76.     curl_setopt($handles[$id], CURLOPT_MAXCONNECTS, 10);
  77. }
  78.  
  79. progress_message();
  80.  
  81. // run the cURL handles in chunks; otherwise, fetching data for a large number
  82. // of articles at once causes stats.grok.se to start refusing HTTP connections
  83.  
  84. $handle_all  = curl_multi_init();
  85. $chunks      = ceil(1.0 * $articles_no / CHUNK_SIZE);
  86. $output      = array();
  87. $error_msgs  = array('Parsing JSON data failed' => -1);
  88.  
  89. $total_views = 0;
  90. $failures    = 0;
  91. $today       = @date('Y-m-d');
  92. $version     = explode('.', phpversion(), 3);
  93.  
  94. if (($version[0] >= 5) &&   // available since PHP 5.5.0
  95.     ($version[1] >= 5)) {
  96.     curl_multi_setopt($handle_all, CURLMOPT_PIPELINING, true);
  97.     curl_multi_setopt($handle_all, CURLMOPT_MAXCONNECTS, 10);
  98. }
  99.  
  100. for ($chunk = 0; $chunk < $chunks; $chunk++) {      // fetch one chunk at a time
  101.     $id_limit = min(($chunk + 1) * CHUNK_SIZE, $articles_no);
  102.  
  103.     for ($id = $chunk * CHUNK_SIZE; $id < $id_limit; $id++)     // all articles in this chunk
  104.         curl_multi_add_handle($handle_all, $handles[$id]);
  105.  
  106.     do {    // fetch the articles stats data in JSON format...
  107.         $status = curl_multi_exec($handle_all, $running);
  108.         progress_message();
  109.     } while (($status == CURLM_CALL_MULTI_PERFORM) ||
  110.              ($running > 0));
  111.  
  112.     for ($id = $chunk * CHUNK_SIZE; $id < $id_limit; $id++) {       // ... and process it
  113.         $json = curl_multi_getcontent($handles[$id]);
  114.  
  115.         if (($json == '') ||    // is the JSON Ok?
  116.             (($json = json_decode($json, true)) === null) ||
  117.             !array_key_exists('daily_views', $json) ||
  118.             !is_array($json['daily_views'])) {
  119.  
  120.             ++$failures;
  121.  
  122.             if (($message = curl_error($handles[$id])) != '') {     // for some reason, curl_errno()
  123.                 if (!array_key_exists($message, $error_msgs)) {     // always returns zero here
  124.                     $errno = -1 * count($error_msgs) - 1;
  125.                     $error_msgs[$message] = $errno;
  126.                 }
  127.                 else    // already seen
  128.                     $errno = $error_msgs[$message];
  129.             }
  130.             else    // below -1 are cURL errors
  131.                 $errno = -1;
  132.  
  133.             $output[$id] = $errno;
  134.         }
  135.         else {  // JSON data Ok
  136.             $views = 0;
  137.  
  138.             foreach ($json['daily_views'] as $key => $value)
  139.                 if (!$curr_month || ($key != $today))   // account only the whole days
  140.                     $views += abs($value);              // just in case, should never be negative
  141.  
  142.             $total_views += $views;
  143.             $output[$id]  = $views;
  144.         }
  145.  
  146.         curl_multi_remove_handle($handle_all, $handles[$id]);
  147.         curl_close($handles[$id]);
  148.  
  149.         progress_message();     // done with this chunk
  150.     }
  151.  
  152.     if ($chunk != ($chunks - 1)) {      // don't sleep after the last chunk
  153.         $message = '#';
  154.         $limit   = CHUNK_SLEEP * 4;
  155.  
  156.         for ($i = 0; $i <= $limit; $i++) {
  157.             if (progress_message($message) === true)    // only one "marker"
  158.                 $message = '.';
  159.  
  160.             usleep(250000);
  161.         }
  162.     }
  163. }
  164.  
  165. curl_multi_close($handle_all);
  166. echo(" done.\n\n");
  167.  
  168. // done fetching all chunks of the stats data, generate and print the output...
  169.  
  170. arsort($output, SORT_NUMERIC);
  171.  
  172. $error_msgs = array_flip($error_msgs);
  173. $first_err  = true;
  174.  
  175. foreach ($output as $id => $views)
  176.     if ($views >= 0)
  177.         echo("- {$articles[$id]}: total " . plural_output($views, 'view') . "\n");
  178.     else {
  179.         if ($first_err === true) {      // display an empty line before
  180.             echo("\n");                 // the first failure message
  181.             $first_err = false;
  182.         }
  183.  
  184.         echo("> {$articles[$id]}: failure ({$error_msgs[$views]})\n");
  185.     }
  186.  
  187. // ... and the final summary
  188.  
  189. $articles_ok  = $articles_no - $failures;
  190. $days         = !$curr_month
  191.                 ? cal_days_in_month(CAL_GREGORIAN, STATS_MONTH, STATS_YEAR)
  192.                 : (@date('j') - 1);
  193. $month_name   = @date('F', @strtotime(STATS_YEAR . '-' . STATS_MONTH . '-01'));
  194. $daily_views  = intval($total_views / $days);
  195.  
  196. $elapsed_time = microtime(true) - $start_time;
  197. $elapsed_min  = intval($elapsed_time / 60);
  198. $elapsed_sec  = round($elapsed_time - $elapsed_min * 60);
  199.  
  200. echo("\nDone, {$month_name} " . STATS_YEAR . ' statistics for ' . plural_output($articles_ok, 'article') .
  201.      ' fetched in ' . (($elapsed_min > 0)
  202.                        ? (plural_output($elapsed_min, 'minute') . ' and ')
  203.                        : '') .
  204.      plural_output($elapsed_sec, 'second') . ".\n" .
  205.      (($failures > 0)
  206.       ? ('Fetching the views statistics failed for ' . plural_output($failures, 'article') . ".\n")
  207.       : '') .
  208.      'Total ' . plural_output($total_views, 'view') . ', averaging in ' .
  209.      plural_output($daily_views, 'view') . ' per day (' .
  210.      plural_output($days, ($curr_month ? 'whole ' : '') . 'day') .
  211.      ' in ' . ($curr_month ? 'current' : 'that') . " month).\n\n");
  212.  
  213. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement