Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/php
- <?php
- /**
- * WSQ format to parsed, serialized PHP hash utility
- *
- * Script for preprocessing FEQ simulation output files, of *.wsq
- * format, for use within Charts. Completely parses input files
- * and generates files with serialized PHP arrays of correct format
- * for use with the HighCharts javascript library.
- *
- * @see expected_usage() for... expected... usage...
- *
- * @author Paul Lambert <pmlambert @ usgs.gov>
- * @package charts
- * @note written for PHP 4.3.9
- */
- ////////////////////////////
- //// SETUP
- ////////////////////////////
- /**
- * C-style macro for handling function return values. Desirable
- * due to PHP < 5's lack of exceptions.
- *
- * @var int EXIT_SUCCESS
- * @var int EXIT_FAILURE
- */
- define("EXIT_SUCCESS", 0);
- define("EXIT_FAILURE", 1);
- // convenient for development environment
- // XXX set to false for server
- define("DEBUG", true);
- if(DEBUG)
- {
- error_reporting(E_ALL|E_WARNING);
- ini_set('display_errors', 1);
- }
- // let php declare argc, argv
- ini_set("register_argc_argv", "1");
- // let php have unlimited memory and execution time --
- // this script should work flawlessly with any number of input
- // files. (historical data, for example)
- ini_set("memory_limit", "-1");
- ini_set("max_execution_time", "0");
- /** Initialization point */
- main($argc, $argv);
- exit("Execution complete.\n");
- /**
- * Handles initialization of script and acts as a canonical home base
- *
- * @param int $argc the size of argv
- * @param mixed $argv hash containing cli args
- * @return integer success code
- */
- function main($argc, $argv)
- {
- $out_dir = $in_files = '';
- $num_files_parsed = 0;
- parse_cli_arguments($argc, $argv, $out_dir, $in_files);
- foreach($in_files as $in_file)
- {
- print "Processing {$in_file}...\n";
- $file_data = array();
- if(process_file($in_file, $file_data) === EXIT_FAILURE)
- {
- print "Unable to process {$in_file}. \n\n";
- continue;
- }
- if(output_results($out_dir, $in_file, $file_data) === EXIT_FAILURE)
- {
- continue;
- }
- ++$num_files_parsed;
- print "{$in_file} successfully processed\n\n";
- }
- if($num_files_parsed === 0)
- {
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
- }
- /**
- * Handles retrieving useful info out of cli args (or exits)
- *
- * @param int $argc size of $argv
- * @param mixed $argv cli arguments
- * @param string $out_dir reference to be populated if valid args
- * @param array $in_files array reference to be populated with
- * input file paths if valid args
- */
- function parse_cli_arguments($argc, $argv, &$out_dir, &$in_files)
- {
- if($argc < 3)
- {
- expected_usage();
- exit("Fatal error: invalid arguments.\n Aborting.\n");
- }
- $script_name = array_shift($argv);
- $out_dir = array_shift($argv);
- $in_files = $argv;
- }
- /**
- * Prints expected usage of script to stdout
- */
- function expected_usage()
- {
- print "\n";
- print "Expected invocation of wsq2ser.php: \n";
- print "php wsq2ser.php /output/dir /input/file1.wsq [/input/file2.wsq ...] \n";
- print "\n";
- }
- ////////////////////////////
- //// FILE PROCESSING
- ////////////////////////////
- /**
- * Grab data from a file, process it by type per location and return
- * the raw results.
- *
- * The input file must adhere to the space-delimited FEQ format
- * this script was designed to handle.
- * @param string $in_file location of input file
- * @param array $file_data reference to be filled by parsed file data
- * @return integer
- */
- function process_file($in_file, &$file_data)
- {
- if(file_exists($in_file) === false)
- {
- print "Error: input file {$in_file} does not exist.\n";
- return EXIT_FAILURE;
- }
- if(($handle = fopen($in_file, "r")) === false)
- {
- print "Error: invalid file descriptor: " . $in_file . "\n";
- return EXIT_FAILURE;
- }
- if(get_titles($handle, $titles) === EXIT_FAILURE)
- {
- print "Error: {$in_file} is not of supported .wsq format.\n";
- return EXIT_FAILURE;
- }
- // initialize the container for processed data
- $file_data = array();
- foreach($titles as $title)
- {
- $file_data[$title] = array();
- }
- if(get_data($handle, $titles, $file_data) === EXIT_FAILURE)
- {
- fclose($handle);
- print "Error: unable to process data from {$in_file}. \n";
- return EXIT_FAILURE;
- }
- fclose($handle);
- return EXIT_SUCCESS;
- }
- /**
- * Finds, splits, and returns an array containing the titles of
- * each gage location contained within this FEQ file.
- *
- * To understand this function, it's important to first understand
- * the structure of a FEQ output file. The first line in the file
- * contains garbage data related to the simulation run;
- * it can be safely ignored.
- *
- * The second line in each .wsq file contains the model # (or,
- * occasionally, an alias) for each site
- *
- * The third line contains headings for the following data columns.
- * Assuming the author knows the file structure ahead of time,
- * this line can be safely ignored
- *
- * @param resource $handle reference the input file handler
- * @param hash $titles reference to be populated
- * @return integer
- */
- function get_titles(&$handle, &$titles)
- {
- rewind($handle);
- $line_0 = (bool)fgets($handle);
- $title_line = fgets($handle);
- $line_2 = (bool)fgets($handle);
- if(($title_line === false) || (($line_0 || $line_2) === false))
- {
- return EXIT_FAILURE;
- }
- $title_delimiter = "/\s+/";
- $titles = preg_split($title_delimiter, $title_line, null,
- PREG_SPLIT_NO_EMPTY);
- if(empty($titles))
- {
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
- }
- /**
- * Reads and parses the input file. Data is properly associated
- * in $file_data for later processing.
- *
- * @param resource $handle data file resource by value
- * @param array $titles
- * @param hash $file_data reference container for file data, to be
- * populated with a hash for each location, containing an array of
- * data points (which are tuples/hashes themselves).
- */
- function get_data($handle, $titles, &$file_data)
- {
- while(($line_buffer = fgets($handle)) !== false)
- {
- if(parse_line($line_buffer, $line_data) === EXIT_FAILURE)
- {
- continue;
- }
- foreach($titles as $title)
- {
- $file_data[$title][] = array_shift($line_data);
- }
- }
- }
- /**
- Split each line by the spaces which delimit it,
- grab relevant data, associate by site implicitly
- (we can do this by knowing file structure ahead of time)
- and return a hash containing each site's data as a separate
- "bundle" (hash containing time, discharge, stage)
- @param line_buffer a line of text from input file
- @return line_data a hash containing a "bundle" for each site
- within the file, linking together time/discharge/stage
- **/
- /**
- * Parses a line of text at a time from a given input file.
- *
- * Note that the data fields are space delimited.
- * @param string $line_buffer one line of file data
- * @param hash $line_data reference being populated with many data
- * triples per location.
- * @return integer
- */
- function parse_line($line_buffer, &$line_data)
- {
- $line_delimiter = "/\s+/";
- $split_line = preg_split($line_delimiter, $line_buffer,
- null, PREG_SPLIT_NO_EMPTY);
- if(empty($split_line))
- {
- return EXIT_FAILURE;
- }
- // omit point if it is invalid
- if(compute_time($split_line, $data_timestamp) === EXIT_FAILURE)
- {
- return EXIT_FAILURE;
- }
- $data_cols = count($split_line);
- $line_data = array();
- for($col=0; $col<$data_cols; $col+=2)
- {
- $discharge = strval($split_line[$col]);
- $stage = strval($split_line[$col+1]);
- $data_point = array(
- "time" => $data_timestamp,
- "discharge" => $discharge,
- "stage" => $stage);
- $line_data[] = $data_point;
- }
- return EXIT_SUCCESS;
- }
- /**
- * Generates a UNIX timestamp in milliseconds from the pre-split
- * data line in our target input file
- *
- * Notice that there are quite a few corrections to be made here:
- * including time zone correction, conversion from seconds to
- * milliseconds for interfacing with JavaScript, converting
- * fractional hours to minutes, ...
- *
- * @param array $split_line freshly split line of data; note, each
- * FEQ file data line begins with data in the following format:
- * YYYY M[M] [D]D HH.HHHH [simulation data]
- * ^^^^ fractional hours
- * @param int &data_timestamp should be set upon proper parsing and
- * handling of the line data
- * @return integer
- */
- function compute_time(&$split_line, &$data_timestamp)
- {
- $year = array_shift($split_line);
- $month = array_shift($split_line);
- $day = array_shift($split_line);
- // convert from fractional hour to whole minutes
- list($hour, $minutes) = explode(".", array_shift($split_line));
- $minutes = $minutes * 6 / 1000;
- $seconds = 0;
- $data_timestamp = (mktime((int) $hour, (int) $minutes,
- (int) $seconds, (int) $month, (int) $day, (int) $year));
- if(($data_timestamp === false) || ($data_timestamp === -1))
- {
- return EXIT_FAILURE;
- }
- $rounding_correction = 60;
- $milliseconds_per_second = 1000;
- $central_timezone_offset = date('Z', time());
- $data_timestamp = $data_timestamp + $rounding_correction;
- $data_timestamp = $data_timestamp * $milliseconds_per_second;
- $data_timestamp = $data_timestamp + $central_timezone_offset;
- return EXIT_SUCCESS;
- }
- ////////////////////////////
- /// DATA CONVERSION AND OUTPUT
- ////////////////////////////
- /**
- * Prepares data sets to be rearranged, then serializes them
- * and dispatches them to be written to file.
- *
- * @param string $out_dir directory where output files are written
- * @param string $in_file name of the file we have been working with
- * @param file_data hash containing both discharge and stage data,
- * across all locations enumerated within $in_file
- * @return integer
- */
- function output_results($out_dir, $in_file, $file_data)
- {
- $in_file = basename($in_file, ".wsq");
- foreach($file_data as $gage_name => $site_data)
- {
- unset($plot_discharge, $plot_stage);
- $plot_discharge["series"] = array();
- $plot_stage["series"] = array();
- format_data($site_data, $in_file, $plot_discharge, $plot_stage);
- // generate output file names
- $out_dis = "{$out_dir}{$in_file}_{$gage_name}_discharge.ser";
- $out_stage = "{$out_dir}{$in_file}_{$gage_name}_stage.ser";
- // write serialized data to file
- swrite($out_dis, $plot_discharge);
- swrite($out_stage, $plot_stage);
- }
- return EXIT_SUCCESS;
- }
- /**
- * Puts data into Highcharts-compatible format; populates two hashes,
- * linking together time & (stage | discharge).
- *
- * @param hash $data the processed file data
- * @param string $in_file input file's name for indexing
- * @param hash $plot_discharge array to be filled with discharge data
- * @param hash $plot_stage array to be filled with stage data
- */
- function format_data($data, $in_file, &$plot_discharge, &$plot_stage)
- {
- $index_discharge = "discharge_" . $in_file;
- $index_stage = "elevation_" . $in_file;
- foreach($data as $data_point)
- {
- $timestamp = $data_point["time"];
- $discharge = $data_point["discharge"];
- $stage = $data_point["stage"];
- $plot_discharge["series"][$index_discharge][] =
- array("x" => $timestamp, "y" => strval($discharge));
- $plot_stage["series"][$index_stage][] =
- array("x" => $timestamp, "y" => strval($stage));
- }
- $plot_discharge = serialize($plot_discharge);
- $plot_stage = serialize($plot_stage);
- }
- /**
- * Wrapper for safely writing to a file. Includes error reporting
- * in the event of any issues.
- *
- * @param string $loc the file location to write to
- * @param string $data expecting just one string to be written
- * @return integer
- */
- function swrite($loc, $data)
- {
- $handle = fopen($loc, 'w');
- if($loc === false)
- {
- print "Error: unable to open {$loc} for writing\n";
- return EXIT_FAILURE;
- }
- fseek($handle, 0);
- if(fwrite($handle, $data) === false)
- {
- print "Error: unable to write results to {$loc}\n";
- return EXIT_FAILURE;
- }
- fclose($handle);
- return EXIT_SUCCESS;
- }
Add Comment
Please, Sign In to add comment