Advertisement
ulfben

CureTogether CVS-parser (with caching)

Dec 2nd, 2011
265
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 3.86 KB | None | 0 0
  1. <?php
  2. /*
  3.     Download and extract data from CureTogether.com CVS-files.
  4.         Optionally cache the data on local drive.
  5.     See: http://weight.ulfben.com for an example.
  6. */
  7. error_reporting(E_ALL);
  8. ini_set('display_errors', '1');
  9. class Crawler {
  10.     private $_loginURL = 'http://curetogether.com/login.php';
  11.     private $_conf = array();
  12.     private $_cache = ''; //a filename to cache the results in
  13.     private $_expire_time = 86400; //24*60*60
  14.     function __construct($cache = '', $expire_time = 86400){
  15.         $this->_cache = $cache;
  16.         $this->_expire_time = $expire_time;
  17.         $cookies = 'cookies.txt';
  18.         $this->_conf = array(
  19.             CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
  20.             CURLOPT_HEADER => false,
  21.             CURLOPT_RETURNTRANSFER => true,
  22.             CURLOPT_COOKIEJAR  => $cookies,
  23.             CURLOPT_COOKIEFILE => $cookies,
  24.          );
  25.         @ unlink($cookies);
  26.     }
  27.     function login($username, $password){
  28.         $ch = curl_init();
  29.         $_conf = $this->_conf;
  30.         $_conf[CURLOPT_URL] = $this->_loginURL;
  31.         $_conf[CURLOPT_POST] = true;   
  32.         $_conf[CURLOPT_POSTFIELDS] = $this->getPostFields(array('loginEmail' =>  $username, 'loginPassword' => $password)); //login form fields, edit accordingly
  33.         $_conf[CURLOPT_FOLLOWLOCATION] = false;
  34.         curl_setopt_array($ch, $_conf);
  35.         curl_exec($ch);      
  36.         curl_close($ch);
  37.     }    
  38.     function get($url, $username, $password){
  39.         if($this->_cache && file_exists($this->_cache) && (time()-$this->_expire_time) < filemtime($this->_cache)){
  40.             return file_get_contents($this->_cache);
  41.         }      
  42.         if($username){
  43.             $this->login($username, $password);
  44.         }      
  45.         $ch = curl_init();        
  46.         $_conf = $this->_conf;
  47.         $_conf[CURLOPT_URL] = $url;
  48.         curl_setopt_array($ch, $_conf);
  49.         $results = curl_exec($ch);        
  50.         curl_close($ch);
  51.         $fh = @ fopen($this->_cache, 'w');
  52.         if($fh !== false){
  53.             fwrite($fh, (string) $results);
  54.             fclose($fh);
  55.         }          
  56.         return $results;
  57.     }
  58.     private function getPostFields($data){
  59.         $return = array();
  60.         foreach ($data as $key => $field){
  61.             $return[] = $key . '=' . urldecode($field);
  62.         }
  63.         return implode('&', $return);
  64.     }
  65. }
  66. $crawler = new Crawler('filename-for-cache.cvs');
  67. $cvs = $crawler->get('http://curetogether.com/home/tracking/dd/', 'username', 'password');
  68. $cvs = str_replace('"', '', $cvs); //all data fields are double-quoted
  69. $cvs = array_filter(explode("\n", $cvs));
  70.  
  71. //start the parsing
  72. $length = count($cvs);
  73. $date_label = 'Measure Date'; //the date column is assumed, and its label given by curetogether. used to find all other labels.
  74. $labels = array();
  75. $sets = array(); //the CVS holds all labels you're tracking at curetogether, so lets parse them all into sets.
  76. $first_line = 0; //the CVS starts with a bit of meta-data, then comes labels and then data.
  77. while(strpos($cvs[$first_line], $date_label) === false){
  78.     $first_line++;  //spin until we find the labels
  79. }
  80. $labels = explode(',', $cvs[$first_line]);
  81. foreach($labels as $label){
  82.     $sets[$label] = array();
  83. }  
  84. $columncount = count($labels);
  85.  
  86. //extract the data fields
  87. for($line = $first_line+1; $line < $length; $line++){ //data begins on the line after labels
  88.     $values = explode(",", $cvs[$line]);
  89.     $date = strtotime($values[0]); //assume first column is always the measuring date
  90.     if($date === false){continue;}
  91.     for($j = 1; $j < $columncount; $j++){ //get the rest of the columns.
  92.         $val = $values[$j];
  93.         if(empty($val)){continue;} 
  94.         $sets[$labels[$j]][$date] = $val;      
  95.     }  
  96. }
  97.  
  98. //pick the set you want, sort it and format for javascript output
  99. $weight = $sets['Weight']; 
  100. ksort($weight);
  101. $js_obj = '';
  102. foreach($weight as $timestamp => $kg){
  103.     $js_obj .= "[{$timestamp}000,{$kg}],"; //add 000 to timestamp for milliseconds.
  104. }
  105. $js_obj = '['. rtrim($js_obj, ',') . '];';
  106. ?>
  107.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement