Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- define('STATS_MONTH', '06'); // MM
- define('STATS_YEAR', '2015'); // YYYY
- define('STATS_LANG', 'de'); // "en", "de", "fr", etc.
- $articles = array('4-Hydroxycumarine',
- [..................]
- 'Refraktärmetalle');
- // ---------------------------------------------
- // obviously, configurable stuff ends here
- // ---------------------------------------------
- define('CHUNK_SIZE', 50); // articles
- define('CHUNK_SLEEP', 3); // seconds
- set_time_limit(0);
- ini_set('memory_limit', 67108864);
- ini_set('default_socket_timeout', 90);
- // a few small helper functions
- function plural_output($value, $unit) {
- return (number_format($value) . " {$unit}" . ((abs($value) != 1) ? 's' : ''));
- }
- function progress_message($message = '.') {
- static $last_message = null;
- $now = microtime(true);
- $ret_val = false;
- if (($last_message === null) ||
- (($now - $last_message) > 0.5)) { // one message every 0.5 seconds
- echo($message);
- $last_message = $now;
- $ret_val = true; // the message was printed
- }
- return ($ret_val);
- }
- // prepare the cURL handles for all articles
- echo("\nFetching statistics data: ");
- $start_time = microtime(true);
- $handles = array();
- $articles_no = count($articles);
- $curr_month = (STATS_MONTH == @date('m'));
- if ($articles_no == 0) // a small sanity check
- die("no articles specified!\n");
- if ($curr_month && (@date('j') == 1)) // only the whole days are accounted
- die("no elapsed days in current month!\n");
- for ($id = 0; $id < $articles_no; $id++) {
- $handles[$id] = curl_init();
- curl_setopt($handles[$id], CURLOPT_URL, 'http://stats.grok.se/json/' . STATS_LANG .
- '/' . STATS_YEAR . STATS_MONTH .
- '/' . str_replace('%2F', '/', rawurlencode($articles[$id])));
- curl_setopt($handles[$id], CURLOPT_HEADER, false);
- curl_setopt($handles[$id], CURLOPT_RETURNTRANSFER, true);
- curl_setopt($handles[$id], CURLOPT_CONNECTTIMEOUT, 20);
- curl_setopt($handles[$id], CURLOPT_TIMEOUT, 60);
- curl_setopt($handles[$id], CURLOPT_DNS_CACHE_TIMEOUT, 3600);
- curl_setopt($handles[$id], CURLOPT_FORBID_REUSE, false);
- curl_setopt($handles[$id], CURLOPT_FRESH_CONNECT, false);
- curl_setopt($handles[$id], CURLOPT_MAXCONNECTS, 10);
- }
- progress_message();
- // run the cURL handles in chunks; otherwise, fetching data for a large number
- // of articles at once causes stats.grok.se to start refusing HTTP connections
- $handle_all = curl_multi_init();
- $chunks = ceil(1.0 * $articles_no / CHUNK_SIZE);
- $output = array();
- $error_msgs = array('Parsing JSON data failed' => -1);
- $total_views = 0;
- $failures = 0;
- $today = @date('Y-m-d');
- $version = explode('.', phpversion(), 3);
- if (($version[0] >= 5) && // available since PHP 5.5.0
- ($version[1] >= 5)) {
- curl_multi_setopt($handle_all, CURLMOPT_PIPELINING, true);
- curl_multi_setopt($handle_all, CURLMOPT_MAXCONNECTS, 10);
- }
- for ($chunk = 0; $chunk < $chunks; $chunk++) { // fetch one chunk at a time
- $id_limit = min(($chunk + 1) * CHUNK_SIZE, $articles_no);
- for ($id = $chunk * CHUNK_SIZE; $id < $id_limit; $id++) // all articles in this chunk
- curl_multi_add_handle($handle_all, $handles[$id]);
- do { // fetch the articles stats data in JSON format...
- $status = curl_multi_exec($handle_all, $running);
- progress_message();
- } while (($status == CURLM_CALL_MULTI_PERFORM) ||
- ($running > 0));
- for ($id = $chunk * CHUNK_SIZE; $id < $id_limit; $id++) { // ... and process it
- $json = curl_multi_getcontent($handles[$id]);
- if (($json == '') || // is the JSON Ok?
- (($json = json_decode($json, true)) === null) ||
- !array_key_exists('daily_views', $json) ||
- !is_array($json['daily_views'])) {
- ++$failures;
- if (($message = curl_error($handles[$id])) != '') { // for some reason, curl_errno()
- if (!array_key_exists($message, $error_msgs)) { // always returns zero here
- $errno = -1 * count($error_msgs) - 1;
- $error_msgs[$message] = $errno;
- }
- else // already seen
- $errno = $error_msgs[$message];
- }
- else // below -1 are cURL errors
- $errno = -1;
- $output[$id] = $errno;
- }
- else { // JSON data Ok
- $views = 0;
- foreach ($json['daily_views'] as $key => $value)
- if (!$curr_month || ($key != $today)) // account only the whole days
- $views += abs($value); // just in case, should never be negative
- $total_views += $views;
- $output[$id] = $views;
- }
- curl_multi_remove_handle($handle_all, $handles[$id]);
- curl_close($handles[$id]);
- progress_message(); // done with this chunk
- }
- if ($chunk != ($chunks - 1)) { // don't sleep after the last chunk
- $message = '#';
- $limit = CHUNK_SLEEP * 4;
- for ($i = 0; $i <= $limit; $i++) {
- if (progress_message($message) === true) // only one "marker"
- $message = '.';
- usleep(250000);
- }
- }
- }
- curl_multi_close($handle_all);
- echo(" done.\n\n");
- // done fetching all chunks of the stats data, generate and print the output...
- arsort($output, SORT_NUMERIC);
- $error_msgs = array_flip($error_msgs);
- $first_err = true;
- foreach ($output as $id => $views)
- if ($views >= 0)
- echo("- {$articles[$id]}: total " . plural_output($views, 'view') . "\n");
- else {
- if ($first_err === true) { // display an empty line before
- echo("\n"); // the first failure message
- $first_err = false;
- }
- echo("> {$articles[$id]}: failure ({$error_msgs[$views]})\n");
- }
- // ... and the final summary
- $articles_ok = $articles_no - $failures;
- $days = !$curr_month
- ? cal_days_in_month(CAL_GREGORIAN, STATS_MONTH, STATS_YEAR)
- : (@date('j') - 1);
- $month_name = @date('F', @strtotime(STATS_YEAR . '-' . STATS_MONTH . '-01'));
- $daily_views = intval($total_views / $days);
- $elapsed_time = microtime(true) - $start_time;
- $elapsed_min = intval($elapsed_time / 60);
- $elapsed_sec = round($elapsed_time - $elapsed_min * 60);
- echo("\nDone, {$month_name} " . STATS_YEAR . ' statistics for ' . plural_output($articles_ok, 'article') .
- ' fetched in ' . (($elapsed_min > 0)
- ? (plural_output($elapsed_min, 'minute') . ' and ')
- : '') .
- plural_output($elapsed_sec, 'second') . ".\n" .
- (($failures > 0)
- ? ('Fetching the views statistics failed for ' . plural_output($failures, 'article') . ".\n")
- : '') .
- 'Total ' . plural_output($total_views, 'view') . ', averaging in ' .
- plural_output($daily_views, 'view') . ' per day (' .
- plural_output($days, ($curr_month ? 'whole ' : '') . 'day') .
- ' in ' . ($curr_month ? 'current' : 'that') . " month).\n\n");
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement