Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- Download and extract data from CureTogether.com CVS-files.
- Optionally cache the data on local drive.
- See: http://weight.ulfben.com for an example.
- */
- error_reporting(E_ALL);
- ini_set('display_errors', '1');
- class Crawler {
- private $_loginURL = 'http://curetogether.com/login.php';
- private $_conf = array();
- private $_cache = ''; //a filename to cache the results in
- private $_expire_time = 86400; //24*60*60
- function __construct($cache = '', $expire_time = 86400){
- $this->_cache = $cache;
- $this->_expire_time = $expire_time;
- $cookies = 'cookies.txt';
- $this->_conf = array(
- CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)',
- CURLOPT_HEADER => false,
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_COOKIEJAR => $cookies,
- CURLOPT_COOKIEFILE => $cookies,
- );
- @ unlink($cookies);
- }
- function login($username, $password){
- $ch = curl_init();
- $_conf = $this->_conf;
- $_conf[CURLOPT_URL] = $this->_loginURL;
- $_conf[CURLOPT_POST] = true;
- $_conf[CURLOPT_POSTFIELDS] = $this->getPostFields(array('loginEmail' => $username, 'loginPassword' => $password)); //login form fields, edit accordingly
- $_conf[CURLOPT_FOLLOWLOCATION] = false;
- curl_setopt_array($ch, $_conf);
- curl_exec($ch);
- curl_close($ch);
- }
- function get($url, $username, $password){
- if($this->_cache && file_exists($this->_cache) && (time()-$this->_expire_time) < filemtime($this->_cache)){
- return file_get_contents($this->_cache);
- }
- if($username){
- $this->login($username, $password);
- }
- $ch = curl_init();
- $_conf = $this->_conf;
- $_conf[CURLOPT_URL] = $url;
- curl_setopt_array($ch, $_conf);
- $results = curl_exec($ch);
- curl_close($ch);
- $fh = @ fopen($this->_cache, 'w');
- if($fh !== false){
- fwrite($fh, (string) $results);
- fclose($fh);
- }
- return $results;
- }
- private function getPostFields($data){
- $return = array();
- foreach ($data as $key => $field){
- $return[] = $key . '=' . urldecode($field);
- }
- return implode('&', $return);
- }
- }
- $crawler = new Crawler('filename-for-cache.cvs');
- $cvs = $crawler->get('http://curetogether.com/home/tracking/dd/', 'username', 'password');
- $cvs = str_replace('"', '', $cvs); //all data fields are double-quoted
- $cvs = array_filter(explode("\n", $cvs));
- //start the parsing
- $length = count($cvs);
- $date_label = 'Measure Date'; //the date column is assumed, and its label given by curetogether. used to find all other labels.
- $labels = array();
- $sets = array(); //the CVS holds all labels you're tracking at curetogether, so lets parse them all into sets.
- $first_line = 0; //the CVS starts with a bit of meta-data, then comes labels and then data.
- while(strpos($cvs[$first_line], $date_label) === false){
- $first_line++; //spin until we find the labels
- }
- $labels = explode(',', $cvs[$first_line]);
- foreach($labels as $label){
- $sets[$label] = array();
- }
- $columncount = count($labels);
- //extract the data fields
- for($line = $first_line+1; $line < $length; $line++){ //data begins on the line after labels
- $values = explode(",", $cvs[$line]);
- $date = strtotime($values[0]); //assume first column is always the measuring date
- if($date === false){continue;}
- for($j = 1; $j < $columncount; $j++){ //get the rest of the columns.
- $val = $values[$j];
- if(empty($val)){continue;}
- $sets[$labels[$j]][$date] = $val;
- }
- }
- //pick the set you want, sort it and format for javascript output
- $weight = $sets['Weight'];
- ksort($weight);
- $js_obj = '';
- foreach($weight as $timestamp => $kg){
- $js_obj .= "[{$timestamp}000,{$kg}],"; //add 000 to timestamp for milliseconds.
- }
- $js_obj = '['. rtrim($js_obj, ',') . '];';
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement