SHARE
TWEET

CureTogether CVS-parser (with caching)

ulfben Dec 2nd, 2011 115 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2. /*
  3.         Download and extract data from CureTogether.com CVS-files.
  4.                 Optionally cache the data on local drive.
  5.         See: http://weight.ulfben.com for an example.
  6. */
  7. error_reporting(E_ALL);
  8. ini_set('display_errors', '1');
  9. class Crawler {
  10.     private $_loginURL = 'http://curetogether.com/login.php';
  11.     private $_conf = array();
  12.     private $_cache = ''; //a filename to cache the results in
  13.         private $_expire_time = 86400; //24*60*60
  14.         function __construct($cache = '', $expire_time = 86400){
  15.                 $this->_cache = $cache;
  16.                 $this->_expire_time = $expire_time;
  17.         $cookies = 'cookies.txt';
  18.         $this->_conf = array(
  19.             CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
  20.             CURLOPT_HEADER => false,
  21.             CURLOPT_RETURNTRANSFER => true,
  22.             CURLOPT_COOKIEJAR  => $cookies,
  23.             CURLOPT_COOKIEFILE => $cookies,
  24.          );    
  25.         @ unlink($cookies);
  26.     }
  27.     function login($username, $password){
  28.         $ch = curl_init();
  29.                 $_conf = $this->_conf;
  30.         $_conf[CURLOPT_URL] = $this->_loginURL;
  31.         $_conf[CURLOPT_POST] = true;   
  32.         $_conf[CURLOPT_POSTFIELDS] = $this->getPostFields(array('loginEmail' =>  $username, 'loginPassword' => $password)); //login form fields, edit accordingly
  33.         $_conf[CURLOPT_FOLLOWLOCATION] = false;
  34.         curl_setopt_array($ch, $_conf);
  35.         curl_exec($ch);      
  36.         curl_close($ch);
  37.     }    
  38.         function get($url, $username, $password){
  39.                 if($this->_cache && file_exists($this->_cache) && (time()-$this->_expire_time) < filemtime($this->_cache)){    
  40.                         return file_get_contents($this->_cache);
  41.                 }              
  42.                 if($username){
  43.                         $this->login($username, $password);
  44.                 }              
  45.                 $ch = curl_init();        
  46.                 $_conf = $this->_conf;
  47.                 $_conf[CURLOPT_URL] = $url;
  48.                 curl_setopt_array($ch, $_conf);
  49.                 $results = curl_exec($ch);        
  50.                 curl_close($ch);
  51.                 $fh = @ fopen($this->_cache, 'w');
  52.                 if($fh !== false){
  53.                         fwrite($fh, (string) $results);
  54.                         fclose($fh);
  55.                 }                      
  56.                 return $results;
  57.     }
  58.     private function getPostFields($data){
  59.         $return = array();
  60.         foreach ($data as $key => $field){
  61.             $return[] = $key . '=' . urldecode($field);
  62.         }
  63.         return implode('&', $return);
  64.     }
  65. }
  66. $crawler = new Crawler('filename-for-cache.cvs');
  67. $cvs = $crawler->get('http://curetogether.com/home/tracking/dd/', 'username', 'password');
  68. $cvs = str_replace('"', '', $cvs); //all data fields are double-quoted
  69. $cvs = array_filter(explode("\n", $cvs));
  70.  
  71. //start the parsing
  72. $length = count($cvs);
  73. $date_label = 'Measure Date'; //the date column is assumed, and its label given by curetogether. used to find all other labels.
  74. $labels = array();
  75. $sets = array(); //the CVS holds all labels you're tracking at curetogether, so lets parse them all into sets.
  76. $first_line = 0; //the CVS starts with a bit of meta-data, then comes labels and then data.
  77. while(strpos($cvs[$first_line], $date_label) === false){
  78.         $first_line++;  //spin until we find the labels
  79. }
  80. $labels = explode(',', $cvs[$first_line]);
  81. foreach($labels as $label){
  82.         $sets[$label] = array();
  83. }      
  84. $columncount = count($labels);
  85.  
  86. //extract the data fields
  87. for($line = $first_line+1; $line < $length; $line++){ //data begins on the line after labels
  88.         $values = explode(",", $cvs[$line]);
  89.         $date = strtotime($values[0]); //assume first column is always the measuring date
  90.         if($date === false){continue;}
  91.         for($j = 1; $j < $columncount; $j++){ //get the rest of the columns.
  92.                 $val = $values[$j];
  93.                 if(empty($val)){continue;}     
  94.                 $sets[$labels[$j]][$date] = $val;              
  95.         }      
  96. }
  97.  
  98. //pick the set you want, sort it and format for javascript output
  99. $weight = $sets['Weight'];     
  100. ksort($weight);
  101. $js_obj = '';
  102. foreach($weight as $timestamp => $kg){
  103.         $js_obj .= "[{$timestamp}000,{$kg}],"; //add 000 to timestamp for milliseconds.
  104. }
  105. $js_obj = '['. rtrim($js_obj, ',') . '];';
  106. ?>
  107.  
RAW Paste Data
Want to get better at PHP?
Learn to code PHP in 2017
Pastebin PRO Summer Special!
Get 40% OFF on Pastebin PRO accounts!
Top