Guest User

simple HTML strip toy

a guest
Oct 31st, 2012
189
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2.  
  3.  
  4. class StripHtmlFsm {
  5.  
  6.     private $state;
  7.     private $empty = false;
  8.     private $HTML;
  9.     private $output = '';
  10.  
  11.     public function __construct($HTML) {
  12.         $this->HTML = $HTML;
  13.         $this->state = 'text';
  14.     }
  15.  
  16.     public function parse_transform() {
  17.         $STOPPER = 100;
  18.         while (!$this->empty) {
  19.             $this->{$this->state}();
  20.         }
  21.     }
  22.  
  23.     public function output() {
  24.         return $this->output;
  25.     }
  26.  
  27.     ## -----------------------------------------------
  28.    ## parse/buffer tools
  29.    ## -----------------------------------------------
  30.  
  31.     private function lookup($start_chars) {
  32.         $len = strlen($start_chars);
  33.         $extr = substr($this->HTML, 0, $len);
  34.         echo "lookup '$extr'\n";
  35.         if ($extr == $start_chars) {
  36.             return true;
  37.         }
  38.         return false;
  39.     }
  40.  
  41.     private function bufferize($str) {
  42.         $this->output .= $str;
  43.     }
  44.  
  45.     private function check_empty() {
  46.         if (strlen($this->HTML) < 1) {
  47.             echo "HTML is empty\n ";
  48.             $this->empty = true;
  49.         }
  50.  
  51.     }
  52.  
  53.     private function move($amount) {
  54.         echo 'moving \'' . substr($this->HTML, 0, $amount) . "'\n";
  55.         $this->bufferize(substr($this->HTML, 0, $amount));
  56.         $this->HTML = substr($this->HTML, $amount);
  57.         $this->check_empty();
  58.     }
  59.  
  60.     private function delete($amount) {
  61.         echo 'delete \'' . substr($this->HTML, 0, $amount) . "'\n";
  62.         $this->HTML  = substr($this->HTML, $amount);
  63.         $this->check_empty();
  64.     }
  65.  
  66.     ## -----------------------------------------------
  67.    ## Etats de la FSM
  68.    ## -----------------------------------------------
  69.  
  70.     private function change_state($state) {
  71.         $transition = "{$this->state}_to_$state";
  72.         echo "Transition $transition\n";
  73.         $this->{$transition}();
  74.         echo "New state $state\n";
  75.         $this->state = $state;
  76.     }
  77.  
  78.     private function text() {
  79.         if ($this->lookup('<')) {
  80.              $this->change_state('start_tag');
  81.          }
  82.         else {
  83.             $this->move(1);
  84.             /*echo "next\n";*/
  85.         }
  86.     }
  87.  
  88.     private function start_tag() {
  89.         if ($this->lookup(' ')) {
  90.             $this->change_state('attr_list');
  91.         }
  92.         elseif ($this->lookup('/>')) {
  93.             $this->change_state('text');
  94.         }
  95.         elseif ($this->lookup('>')) {
  96.             $this->change_state('text');
  97.         }
  98.         else {
  99.              $this->delete(1);
  100.         }
  101.     }
  102.  
  103.     private function attr_list() {
  104.         if ($this->lookup('="')) {
  105.             $this->delete(2);
  106.             $this->change_state('attr_val');
  107.         }
  108.         elseif ($this->lookup('/>')) {
  109.             $this->change_state('text');
  110.         }
  111.         elseif ($this->lookup('>')) {
  112.             $this->change_state('text');
  113.         }
  114.         else {
  115.             $this->delete(1);
  116.         }
  117.     }
  118.  
  119.     private function attr_val() {
  120.         if ($this->lookup('')) {
  121.  
  122.         }
  123.         if ($this->lookup('\"')) {
  124.             $this->delete(2);
  125.         }
  126.         elseif ($this->lookup('"')) {
  127.             $this->delete(1);
  128.             $this->change_state('attr_list');
  129.         }
  130.         else {
  131.             $this->delete(1);
  132.         }
  133.     }
  134.  
  135.     ## -----------------------------------------------
  136.    ## Transitions de la FSM
  137.    ## -----------------------------------------------
  138.  
  139.     private function text_to_start_tag() {
  140.         $this->delete(1);
  141.     }
  142.  
  143.     private function start_tag_to_attr_list() {
  144.     }
  145.  
  146.     private function attr_list_to_attr_val() {
  147.     }
  148.  
  149.     private function attr_val_to_attr_list() {
  150.     }
  151.  
  152.     private function attr_list_to_text() {
  153.         $this->start_tag_to_text();
  154.     }
  155.  
  156.     private function start_tag_to_text() {
  157.         if ($this->lookup('/>')) {
  158.             $this->delete(2);
  159.         }
  160.         else {
  161.             $this->delete(1);
  162.             $this->bufferize('_');
  163.         }
  164.     }
  165.  
  166.  
  167.  
  168. }
  169. set_time_limit(3);
  170. header('content-type: text/plain; charset=utf8');
  171. ini_set('html_errors','off');
  172. $HTML = 'hallo mon nom est <strong class="qzd\">mou">Jeorg</strong>,<br/> et toi ?';
  173. $p = new StripHtmlFsm($HTML);
  174. $p->parse_transform();
  175. var_dump($p);
  176. echo $p->output();
RAW Paste Data