Guest User

shrinkray-redacted.php

a guest
Apr 3rd, 2015
38
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2.  
  3. /*
  4. date: 2013-dec/2014-jan
  5. beasmaster: REDACTED
  6. help: REDACTED
  7. svn: http://REDACTED/hrinkray/?root=svn
  8. readme: http://REDACTED/README.md?root=svn&view=log
  9. overview: given a file, try progressive steps to reduce the information
  10. */
  11.  
  12. // derived
  13. $how = $_SERVER['REQUEST_METHOD'];
  14.  
  15. // steped in as needed
  16. $ax=isset($_REQUEST['ax']) ? $_REQUEST['ax'] : null;
  17. $mode=isset($_REQUEST['mode']) ? $_REQUEST['mode'] : null;
  18. $srpath=isset($_REQUEST['srpath']) ? $_REQUEST['srpath'] : null;
  19. $max=isset($_REQUEST['max']) ? $_REQUEST['max'] : null;
  20.  
  21. // target size
  22. $shrinkray_max_size=7340032;
  23.  
  24.  
  25. // response codes
  26. $shrinkray_status_ok='HTTP/1.1 200 OK';
  27. $shrinkray_status_shrunk_tags='HTTP/1.1 230 Shrunk via tags';
  28. $shrinkray_status_shrunk_dedup='HTTP/1.1 235 Shurnk via duplication';
  29. $shrinkray_status_shrunk_punctuation='HTTP/1.1 239 Shrunk via punctuation';
  30. $shrinkray_status_shrunk_numbers='HTTP/1.1 240 Shrunk via numbers';
  31. $shrinkray_status_shrunk_lowercase='HTTP/1.1 245 Shrunk via lowercase';
  32. $shrinkray_status_shrunk_header='HTTP/1.1 250 Shrunk via header';
  33. $shrinkray_status_error='HTTP/1.1 400 Bad Request';
  34. $shrinkray_status_gone='HTTP/1.1 410 Gone';
  35. $shrinkray_status_precondition_failed='HTTP/1.1 412 Precondition Failed';
  36. $shrinkray_status_too_large='HTTP/1.1 413 Request Entity Too Large';
  37. $shrinkray_status_internal_error='HTTP/1.1 500 Internal Server Error';
  38. $shrinkray_status_redirect='Location: /shrinkray/';
  39.  
  40. // array magic to find uniq values
  41. function shrinkray_dedup($input_string) {
  42.     $start_array=explode(" ", $input_string);
  43.     # $dedup_array=array_flip(array_merge(array_flip($start_array))); // faster than array_unique
  44.     $dedup_array=array_merge(array_flip(array_flip($start_array))); // faster than array_unique
  45.     $output_string=implode(" ", $dedup_array);
  46.     return $output_string;
  47. }
  48.  
  49. // evaluate the string, did it work? if so, output
  50. function shrinkray_eval($input_string, $response_status) {
  51.     global $shrinkray_max_size;
  52.     if (strlen($input_string) < $shrinkray_max_size ) {
  53.         shrinkray_response($response_status); // set HTTP headers before delivering data!
  54.         print $input_string;
  55.         exit;
  56.     }
  57. }
  58.  
  59.  
  60. // handle all HTTP headers
  61. function shrinkray_response($shrinkray_status, $shrinkray_message, $shrinkray_exit = NULL) {
  62.     header($shrinkray_status);
  63.     if (!empty($shrinkray_message)) {
  64.          echo $shrinkray_message;
  65.     }
  66.     if (!empty($shrinkray_exit)){
  67.          exit($shrinkray_exit);
  68.     }
  69. }
  70.  
  71. switch ($ax) {
  72.     case "help":
  73.         print "This is a webservice for shrinking documents.  It accepts the ax of help, lim, uq, ht, sr.  Find out more http://REDACTED/docs/README.md?root=svn&view=log";
  74.         exit;
  75.         break;
  76.     case "lim":
  77.         print $shrinkray_max_size;
  78.         exit;
  79.         break;
  80.     case "sr":
  81.         if (empty($srpath)) {
  82.            shrinkray_response($shrinkray_status_error, "did not recieve GET with srpath", 1);
  83.            exit;
  84.         }
  85.         if (is_readable($srpath)) {
  86.             $source_string=file_get_contents($srpath, true);
  87.         } else {
  88.             shrinkray_response($shrinkray_status_gone, "document does not exist", 0);
  89.             exit;
  90.         }
  91.         if (strlen($source_string) > $shrinkray_max_size) {
  92.             // step - strip HTML
  93.             $working_string=strip_tags($source_string);
  94.             shrinkray_eval($working_string, $shrinkray_status_shrunk_tags);
  95.            
  96.             // step - remove duplicate tokens
  97.             shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_dedup);
  98.            
  99.             // step - strip punctuation
  100.             $working_string=preg_replace("/[\p{P}\p{S}\p{Zp}]/", " ", $working_string);
  101.             $working_string=preg_replace("/\s\s+/", " ", $working_string);
  102.                 // reference: http://us1.php.net/manual/en/regexp.reference.unicode.php
  103.                 // \p{P}    punctuation class
  104.                 // \p{S}    symbol class
  105.                 // \p{N}    number class
  106.                 // was $working_string=preg_replace("/[^a-zA-Z0-9\s]+/", " ", $working_string);
  107.             shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_punctuation);
  108.            
  109.             // step - strip numbers
  110.             $working_string=preg_replace("/\p{N}/", " ", $working_string);
  111.             $working_string=preg_replace("/\s\s+/", " ", $working_string);
  112.             shrinkray_eval(shrinkray_dedup($working_string), $shrinkray_status_shrunk_numbers);
  113.            
  114.             // step - lower case
  115.             shrinkray_eval(shrinkray_dedup(strtolower($working_string)), $shrinkray_status_shrunk_lowercase);
  116.            
  117.             // step - head, switch back to source string, remove HTML tags again
  118.             $working_string=substr(strip_tags($source_string), 0, ($shrinkray_max_size - 100));
  119.             $working_string=preg_replace("/\s\s+/", " ", $working_string);
  120.             shrinkray_eval($working_string.'...', $shrinkray_status_shrunk_header);
  121.            
  122.             // Ugh, it didn't work and really there's not a lot of ways to get to this step
  123.             shrinkray_response($shrinkray_status_too_large, "tried all the tricks, still too large", 1);
  124.            
  125.         } else {
  126.             shrinkray_response($shrinkray_status_precondition_failed, "document does not need shrinking", 0);
  127.         }
  128.         break;
  129.     default:
  130.         shrinkray_response($shrinkray_status_redirect, "bye bye", 0);
  131.         break;
  132. }
  133. ?>
RAW Paste Data