Guest User

Untitled

a guest
Jan 12th, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 12.19 KB | None | 0 0
  1. #!/usr/bin/php
  2. <?php
  3. /**
  4.  * WSQ format to parsed, serialized PHP hash utility
  5.  *
  6.  * Script for preprocessing FEQ simulation output files, of *.wsq
  7.  * format, for use within Charts. Completely parses input files
  8.  * and generates files with serialized PHP arrays of correct format
  9.  * for use with the HighCharts javascript library.
  10.  *
  11.  * @see expected_usage() for... expected... usage...
  12.  *
  13.  * @author  Paul Lambert <pmlambert @ usgs.gov>
  14.  * @package charts
  15.  * @note    written for PHP 4.3.9
  16.  */
  17.  
  18.  
  19. ////////////////////////////
  20. //// SETUP
  21. ////////////////////////////
  22.  
  23. /**
  24.  * C-style macro for handling function return values. Desirable
  25.  * due to PHP < 5's lack of exceptions.
  26.  *
  27.  * @var int EXIT_SUCCESS
  28.  * @var int EXIT_FAILURE
  29.  */
  30. define("EXIT_SUCCESS", 0);
  31. define("EXIT_FAILURE", 1);
  32.  
  33. // convenient for development environment
  34. // XXX set to false for server
  35. define("DEBUG", true);
  36. if(DEBUG)
  37. {
  38.   error_reporting(E_ALL|E_WARNING);
  39.   ini_set('display_errors', 1);
  40. }
  41.  
  42. // let php declare argc, argv
  43. ini_set("register_argc_argv", "1");
  44.  
  45. // let php have unlimited memory and execution time --
  46. // this script should work flawlessly with any number of input
  47. // files. (historical data, for example)
  48. ini_set("memory_limit", "-1");
  49. ini_set("max_execution_time", "0");
  50.  
  51.  
  52. /** Initialization point */
  53. main($argc, $argv);
  54. exit("Execution complete.\n");
  55.  
  56. /**
  57.  * Handles initialization of script and acts as a canonical home base
  58.  *
  59.  * @param int   $argc the size of argv
  60.  * @param mixed $argv hash containing cli args
  61.  * @return integer success code
  62.  */
  63. function main($argc, $argv)
  64. {
  65.   $out_dir = $in_files = '';
  66.   $num_files_parsed = 0;
  67.  
  68.   parse_cli_arguments($argc, $argv, $out_dir, $in_files);
  69.  
  70.   foreach($in_files as $in_file)
  71.   {
  72.     print "Processing {$in_file}...\n";
  73.  
  74.     $file_data = array();
  75.     if(process_file($in_file, $file_data) === EXIT_FAILURE)
  76.     {
  77.       print "Unable to process {$in_file}. \n\n";
  78.       continue;
  79.     }
  80.  
  81.     if(output_results($out_dir, $in_file, $file_data) === EXIT_FAILURE)
  82.     {
  83.       continue;
  84.     }
  85.  
  86.     ++$num_files_parsed;
  87.     print "{$in_file} successfully processed\n\n";
  88.   }
  89.  
  90.   if($num_files_parsed === 0)
  91.   {
  92.     return EXIT_FAILURE;
  93.   }
  94.  
  95.   return EXIT_SUCCESS;
  96. }
  97.  
  98. /**
  99.  * Handles retrieving useful info out of cli args (or exits)
  100.  *
  101.  * @param int $argc size of $argv
  102.  * @param mixed $argv cli arguments
  103.  * @param string $out_dir reference to be populated if valid args
  104.  * @param array $in_files array reference to be populated with
  105.  *   input file paths if valid args
  106.  */
  107. function parse_cli_arguments($argc, $argv, &$out_dir, &$in_files)
  108. {
  109.   if($argc < 3)
  110.   {
  111.     expected_usage();
  112.     exit("Fatal error: invalid arguments.\n Aborting.\n");
  113.   }
  114.  
  115.   $script_name = array_shift($argv);
  116.   $out_dir = array_shift($argv);
  117.   $in_files = $argv;
  118. }
  119.  
  120. /**
  121.  * Prints expected usage of script to stdout
  122.  */
  123. function expected_usage()
  124. {
  125.   print "\n";
  126.   print "Expected invocation of wsq2ser.php: \n";
  127.   print "php wsq2ser.php /output/dir /input/file1.wsq [/input/file2.wsq ...] \n";
  128.   print "\n";
  129. }
  130.  
  131. ////////////////////////////
  132. //// FILE PROCESSING
  133. ////////////////////////////
  134.  
  135. /**
  136.  * Grab data from a file, process it by type per location and return
  137.  * the raw results.
  138.  *
  139.  * The input file must adhere to the space-delimited FEQ format
  140.  * this script was designed to handle.
  141.  * @param string $in_file location of input file
  142.  * @param array $file_data reference to be filled by parsed file data
  143.  * @return integer
  144.  */
  145. function process_file($in_file, &$file_data)
  146. {
  147.   if(file_exists($in_file) === false)
  148.   {
  149.     print "Error: input file {$in_file} does not exist.\n";
  150.     return EXIT_FAILURE;
  151.   }
  152.  
  153.   if(($handle = fopen($in_file, "r")) === false)
  154.   {
  155.     print "Error: invalid file descriptor: " . $in_file . "\n";
  156.     return EXIT_FAILURE;
  157.   }
  158.  
  159.   if(get_titles($handle, $titles) === EXIT_FAILURE)
  160.   {
  161.     print "Error: {$in_file} is not of supported .wsq format.\n";
  162.     return EXIT_FAILURE;
  163.   }
  164.  
  165.   // initialize the container for processed data
  166.   $file_data = array();
  167.   foreach($titles as $title)
  168.   {
  169.     $file_data[$title] = array();
  170.   }
  171.  
  172.   if(get_data($handle, $titles, $file_data) === EXIT_FAILURE)
  173.   {
  174.     fclose($handle);
  175.     print "Error: unable to process data from {$in_file}. \n";
  176.     return EXIT_FAILURE;
  177.   }
  178.  
  179.   fclose($handle);
  180.   return EXIT_SUCCESS;
  181. }
  182.  
  183. /**
  184.  * Finds, splits, and returns an array containing the titles of
  185.  * each gage location contained within this FEQ file.
  186.  *
  187.  * To understand this function, it's important to first understand
  188.  * the structure of a FEQ output file. The first line in the file
  189.  * contains garbage data related to the simulation run;
  190.  * it can be safely ignored.
  191.  *
  192.  * The second line in each .wsq file contains the model # (or,
  193.  * occasionally, an alias) for each site
  194.  *
  195.  * The third line contains headings for the following data columns.
  196.  * Assuming the author knows the file structure ahead of time,
  197.  * this line can be safely ignored
  198.  *
  199.  * @param resource $handle reference the input file handler
  200.  * @param hash $titles reference to be populated
  201.  * @return integer
  202.  */
  203. function get_titles(&$handle, &$titles)
  204. {
  205.   rewind($handle);
  206.  
  207.   $line_0 = (bool)fgets($handle);
  208.   $title_line = fgets($handle);
  209.   $line_2 = (bool)fgets($handle);
  210.  
  211.   if(($title_line === false) || (($line_0 || $line_2) === false))
  212.   {
  213.     return EXIT_FAILURE;
  214.   }
  215.  
  216.   $title_delimiter = "/\s+/";
  217.   $titles = preg_split($title_delimiter, $title_line, null,
  218.     PREG_SPLIT_NO_EMPTY);
  219.  
  220.   if(empty($titles))
  221.   {
  222.     return EXIT_FAILURE;
  223.   }
  224.  
  225.   return EXIT_SUCCESS;
  226. }
  227.  
  228. /**
  229.  * Reads and parses the input file. Data is properly associated
  230.  * in $file_data for later processing.
  231.  *
  232.  * @param resource $handle data file resource by value
  233.  * @param array $titles
  234.  * @param hash $file_data reference container for file data, to be
  235.  *  populated with a hash for each location, containing an array of
  236.  *  data points (which are tuples/hashes themselves).
  237.  */
  238. function get_data($handle, $titles, &$file_data)
  239. {
  240.   while(($line_buffer = fgets($handle)) !== false)
  241.   {
  242.     if(parse_line($line_buffer, $line_data) === EXIT_FAILURE)
  243.     {
  244.       continue;
  245.     }
  246.  
  247.     foreach($titles as $title)
  248.     {
  249.       $file_data[$title][] = array_shift($line_data);
  250.     }
  251.   }
  252. }
  253.  
  254. /**
  255.   Split each line by the spaces which delimit it,
  256.   grab relevant data, associate by site implicitly
  257.   (we can do this by knowing file structure ahead of time)
  258.   and return a hash containing each site's data as a separate
  259.   "bundle" (hash containing time, discharge, stage)
  260.  
  261.   @param line_buffer a line of text from input file
  262.   @return line_data a hash containing a "bundle" for each site
  263.       within the file, linking together time/discharge/stage
  264. **/
  265.  
  266. /**
  267.  * Parses a line of text at a time from a given input file.
  268.  *
  269.  * Note that the data fields are space delimited.
  270.  * @param string $line_buffer one line of file data
  271.  * @param hash $line_data reference being populated with many data
  272.  *  triples per location.
  273.  * @return integer
  274.  */
  275. function parse_line($line_buffer, &$line_data)
  276. {
  277.   $line_delimiter = "/\s+/";
  278.   $split_line = preg_split($line_delimiter, $line_buffer,
  279.       null, PREG_SPLIT_NO_EMPTY);
  280.  
  281.   if(empty($split_line))
  282.   {
  283.     return EXIT_FAILURE;
  284.   }
  285.  
  286.   // omit point if it is invalid
  287.   if(compute_time($split_line, $data_timestamp) === EXIT_FAILURE)
  288.   {
  289.     return EXIT_FAILURE;
  290.   }
  291.  
  292.   $data_cols = count($split_line);
  293.   $line_data = array();
  294.  
  295.   for($col=0; $col<$data_cols; $col+=2)
  296.   {
  297.     $discharge = strval($split_line[$col]);
  298.     $stage = strval($split_line[$col+1]);
  299.  
  300.     $data_point = array(
  301.         "time" => $data_timestamp,
  302.         "discharge" => $discharge,
  303.         "stage" => $stage);
  304.  
  305.     $line_data[] = $data_point;
  306.   }
  307.  
  308.   return EXIT_SUCCESS;
  309. }
  310.  
  311. /**
  312.  * Generates a UNIX timestamp in milliseconds from the pre-split
  313.  * data line in our target input file
  314.  *
  315.  * Notice that there are quite a few corrections to be made here:
  316.  * including time zone correction, conversion from seconds to
  317.  * milliseconds for interfacing with JavaScript, converting
  318.  * fractional hours to minutes, ...
  319.  *
  320.  * @param array $split_line freshly split line of data; note, each
  321.  *  FEQ file data line begins with data in the following format:
  322.  *  YYYY M[M] [D]D HH.HHHH   [simulation data]
  323.  *                    ^^^^ fractional hours
  324.  * @param int &data_timestamp should be set upon proper parsing and
  325.  *  handling of the line data
  326.  * @return integer
  327.  */
  328. function compute_time(&$split_line, &$data_timestamp)
  329. {
  330.   $year   = array_shift($split_line);
  331.   $month  = array_shift($split_line);
  332.   $day    = array_shift($split_line);
  333.  
  334.   // convert from fractional hour to whole minutes
  335.   list($hour, $minutes) = explode(".", array_shift($split_line));
  336.   $minutes = $minutes * 6 / 1000;
  337.   $seconds = 0;
  338.  
  339.   $data_timestamp = (mktime((int) $hour, (int) $minutes,
  340.     (int) $seconds, (int) $month, (int) $day, (int) $year));
  341.  
  342.   if(($data_timestamp === false) || ($data_timestamp === -1))
  343.   {
  344.     return EXIT_FAILURE;
  345.   }
  346. $rounding_correction = 60;
  347.   $milliseconds_per_second = 1000;
  348.   $central_timezone_offset = date('Z', time());
  349.  
  350.   $data_timestamp = $data_timestamp + $rounding_correction;
  351.   $data_timestamp = $data_timestamp * $milliseconds_per_second;
  352.   $data_timestamp = $data_timestamp + $central_timezone_offset;
  353.  
  354.   return EXIT_SUCCESS;
  355. }
  356.  
  357.  
  358. ////////////////////////////
  359. /// DATA CONVERSION AND OUTPUT
  360. ////////////////////////////
  361.  
  362. /**
  363.   * Prepares data sets to be rearranged, then serializes them
  364.   * and dispatches them to be written to file.
  365.   *
  366.   * @param string $out_dir directory where output files are written
  367.   * @param string $in_file name of the file we have been working with
  368.   * @param file_data hash containing both discharge and stage data,
  369.   *   across all locations enumerated within $in_file
  370.   * @return integer
  371.   */
  372. function output_results($out_dir, $in_file, $file_data)
  373. {
  374.   $in_file = basename($in_file, ".wsq");
  375.  
  376.   foreach($file_data as $gage_name => $site_data)
  377.   {
  378.     unset($plot_discharge, $plot_stage);
  379.     $plot_discharge["series"] = array();
  380.     $plot_stage["series"] = array();
  381.  
  382.     format_data($site_data, $in_file, $plot_discharge, $plot_stage);
  383.    
  384.     // generate output file names
  385.     $out_dis = "{$out_dir}{$in_file}_{$gage_name}_discharge.ser";
  386.     $out_stage = "{$out_dir}{$in_file}_{$gage_name}_stage.ser";
  387.  
  388.     // write serialized data to file
  389.     swrite($out_dis, $plot_discharge);
  390.     swrite($out_stage, $plot_stage);
  391.   }
  392.  
  393.   return EXIT_SUCCESS;
  394. }
  395.  
  396. /**
  397.  * Puts data into Highcharts-compatible format; populates two hashes,
  398.  * linking together time & (stage | discharge).
  399.  *
  400.  * @param hash $data the processed file data
  401.  * @param string $in_file input file's name for indexing
  402.  * @param hash $plot_discharge array to be filled with discharge data
  403.  * @param hash $plot_stage array to be filled with stage data
  404.  */
  405. function format_data($data, $in_file, &$plot_discharge, &$plot_stage)
  406. {
  407.   $index_discharge  = "discharge_" . $in_file;
  408.   $index_stage      = "elevation_" . $in_file;
  409.  
  410.   foreach($data as $data_point)
  411.   {
  412.     $timestamp = $data_point["time"];
  413.     $discharge = $data_point["discharge"];
  414.     $stage     = $data_point["stage"];
  415.  
  416.     $plot_discharge["series"][$index_discharge][] =
  417.       array("x" => $timestamp, "y" => strval($discharge));
  418.  
  419.     $plot_stage["series"][$index_stage][] =
  420.       array("x" => $timestamp, "y" => strval($stage));
  421.   }
  422.  
  423.   $plot_discharge = serialize($plot_discharge);
  424.   $plot_stage = serialize($plot_stage);
  425. }
  426.  
  427.  
  428. /**
  429.   * Wrapper for safely writing to a file. Includes error reporting
  430.   * in the event of any issues.
  431.   *
  432.   * @param string $loc the file location to write to
  433.   * @param string $data expecting just one string to be written
  434.   * @return integer
  435.   */
  436. function swrite($loc, $data)
  437. {
  438.   $handle = fopen($loc, 'w');
  439.   if($loc === false)
  440.   {
  441.     print "Error: unable to open {$loc} for writing\n";
  442.     return EXIT_FAILURE;
  443.   }
  444.  
  445.   fseek($handle, 0);
  446.  
  447.   if(fwrite($handle, $data) === false)
  448.   {
  449.     print "Error: unable to write results to {$loc}\n";
  450.     return EXIT_FAILURE;
  451.   }
  452.  
  453.   fclose($handle);
  454.   return EXIT_SUCCESS;
  455. }
Add Comment
Please, Sign In to add comment