SHARE
TWEET

shrinkray-redacted.php

a guest Apr 3rd, 2015 10 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2.  
  3. /*
  4. date: 2013-dec/2014-jan
  5. beasmaster: REDACTED
  6. help: REDACTED
  7. svn: http://REDACTED/hrinkray/?root=svn
  8. readme: http://REDACTED/README.md?root=svn&view=log
  9. overview: given a file, try progressive steps to reduce the information
  10. */
  11.  
  12. // derived
  13. $how = $_SERVER['REQUEST_METHOD'];
  14.  
  15. // steped in as needed
  16. $ax=isset($_REQUEST['ax']) ? $_REQUEST['ax'] : null;
  17. $mode=isset($_REQUEST['mode']) ? $_REQUEST['mode'] : null;
  18. $srpath=isset($_REQUEST['srpath']) ? $_REQUEST['srpath'] : null;
  19. $max=isset($_REQUEST['max']) ? $_REQUEST['max'] : null;
  20.  
  21. // target size
  22. $shrinkray_max_size=7340032;
  23.  
  24.  
  25. // response codes
  26. $shrinkray_status_ok='HTTP/1.1 200 OK';
  27. $shrinkray_status_shrunk_tags='HTTP/1.1 230 Shrunk via tags';
  28. $shrinkray_status_shrunk_dedup='HTTP/1.1 235 Shurnk via duplication';
  29. $shrinkray_status_shrunk_punctuation='HTTP/1.1 239 Shrunk via punctuation';
  30. $shrinkray_status_shrunk_numbers='HTTP/1.1 240 Shrunk via numbers';
  31. $shrinkray_status_shrunk_lowercase='HTTP/1.1 245 Shrunk via lowercase';
  32. $shrinkray_status_shrunk_header='HTTP/1.1 250 Shrunk via header';
  33. $shrinkray_status_error='HTTP/1.1 400 Bad Request';
  34. $shrinkray_status_gone='HTTP/1.1 410 Gone';
  35. $shrinkray_status_precondition_failed='HTTP/1.1 412 Precondition Failed';
  36. $shrinkray_status_too_large='HTTP/1.1 413 Request Entity Too Large';
  37. $shrinkray_status_internal_error='HTTP/1.1 500 Internal Server Error';
  38. $shrinkray_status_redirect='Location: /shrinkray/';
  39.  
  40. // array magic to find uniq values
  41. function shrinkray_dedup($input_string) {
  42.     $start_array=explode(" ", $input_string);
  43.     # $dedup_array=array_flip(array_merge(array_flip($start_array))); // faster than array_unique
  44.         $dedup_array=array_merge(array_flip(array_flip($start_array))); // faster than array_unique
  45.     $output_string=implode(" ", $dedup_array);
  46.         return $output_string;
  47. }
  48.  
  49. // evaluate the string, did it work? if so, output
  50. function shrinkray_eval($input_string, $response_status) {
  51.         global $shrinkray_max_size;
  52.         if (strlen($input_string) < $shrinkray_max_size ) {
  53.                 shrinkray_response($response_status); // set HTTP headers before delivering data!
  54.                 print $input_string;
  55.                 exit;
  56.         }
  57. }
  58.  
  59.  
  60. // handle all HTTP headers
  61. function shrinkray_response($shrinkray_status, $shrinkray_message, $shrinkray_exit = NULL) {
  62.     header($shrinkray_status);
  63.     if (!empty($shrinkray_message)) {
  64.          echo $shrinkray_message;
  65.     }
  66.     if (!empty($shrinkray_exit)){
  67.          exit($shrinkray_exit);
  68.     }
  69. }
  70.  
  71. switch ($ax) {
  72.         case "help":
  73.                 print "This is a webservice for shrinking documents.  It accepts the ax of help, lim, uq, ht, sr.  Find out more http://REDACTED/docs/README.md?root=svn&view=log";
  74.                 exit;
  75.                 break;
  76.         case "lim":
  77.                 print $shrinkray_max_size;
  78.                 exit;
  79.                 break;
  80.         case "sr":
  81.                 if (empty($srpath)) {
  82.                    shrinkray_response($shrinkray_status_error, "did not recieve GET with srpath", 1);
  83.                    exit;
  84.                 }
  85.                 if (is_readable($srpath)) {
  86.                     $source_string=file_get_contents($srpath, true);
  87.                 } else {
  88.                         shrinkray_response($shrinkray_status_gone, "document does not exist", 0);
  89.                         exit;
  90.                 }
  91.                 if (strlen($source_string) > $shrinkray_max_size) {
  92.                         // step - strip HTML
  93.                         $working_string=strip_tags($source_string);
  94.                         shrinkray_eval($working_string, $shrinkray_status_shrunk_tags);
  95.                        
  96.                         // step - remove duplicate tokens
  97.                         shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_dedup);
  98.                        
  99.                         // step - strip punctuation
  100.                         $working_string=preg_replace("/[\p{P}\p{S}\p{Zp}]/", " ", $working_string);
  101.                         $working_string=preg_replace("/\s\s+/", " ", $working_string);
  102.                                 // reference: http://us1.php.net/manual/en/regexp.reference.unicode.php
  103.                                 // \p{P}        punctuation class
  104.                                 // \p{S}        symbol class
  105.                                 // \p{N}        number class
  106.                                 // was $working_string=preg_replace("/[^a-zA-Z0-9\s]+/", " ", $working_string);
  107.                         shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_punctuation);
  108.                        
  109.                         // step - strip numbers
  110.                         $working_string=preg_replace("/\p{N}/", " ", $working_string);
  111.                         $working_string=preg_replace("/\s\s+/", " ", $working_string);
  112.                         shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_numbers);
  113.                        
  114.                         // step - lower case
  115.                         shrinkray_eval(shrinkray_dedup(strtolower($working_string)), $shrinkray_status_shrunk_lowercase);
  116.                        
  117.                         // step - head, switch back to source string, remove HTML tags again
  118.                         $working_string=substr(strip_tags($source_string), 0, ($shrinkray_max_size - 100));
  119.                         $working_string=preg_replace("/\s\s+/", " ", $working_string);
  120.                         shrinkray_eval($working_string.'...', $shrinkray_status_shrunk_header);
  121.                        
  122.                         // Ugh, it didn't work and really there's not a lot of ways to get to this step
  123.                         shrinkray_response($shrinkray_status_too_large, "tried all the tricks, still too large", 1);
  124.                        
  125.                 } else {
  126.                         shrinkray_response($shrinkray_status_precondition_failed, "document does not need shrinking", 0);
  127.                 }
  128.                 break;
  129.         default:
  130.             shrinkray_response($shrinkray_status_redirect, "bye bye", 0);
  131.             break;
  132. }
  133. ?>
RAW Paste Data
Top