Advertisement
Bodigrim

Formatting texts for cat|less

Oct 7th, 2011
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.73 KB | None | 0 0
  1. <?php
  2.  
  3. $input  = "book.in";
  4. $output = "book.out";
  5. $chars_per_line = 60;
  6. $margin_left  = 0;
  7. $margin_right = 0;
  8. $red_line     = 5;
  9.  
  10. define("NBSP","~");
  11.  
  12. function is_punct($char){
  13.     return $char=="," || $char=="." || $char=="-" || $char=="—" || $char==";" || $char==":" || $char=="!" || $char=="?" || $char==")";
  14.     }
  15.    
  16. function is_space($char){
  17.     return $char==" " || $char=="\t" || $char=="\r" || $char=="\n";
  18.     }
  19.    
  20. function sp($n){
  21.     return str_repeat(" ",$n);
  22.     }
  23.    
  24. function crop($line,$len){
  25.     if(mb_strlen($line)<=$len)
  26.         return $line;
  27.    
  28.     $line=mb_substr($line,0,$len);
  29.    
  30.     $i=mb_strlen($line)-1;
  31.     while($i>=2 && !is_space(mb_substr($line,$i,1)) )
  32.         $i--;
  33.     $line=trim(mb_substr($line,0,$i));
  34.    
  35.     return $line;
  36.     }
  37.    
  38. function justify($line,$len){
  39.     $line=trim($line);
  40.     $words=mb_split("\s+",$line);
  41.    
  42.     $total=0;
  43.     $deficite=$len;
  44.     $factors=array();
  45.     for($i=0;$i<count($words)-1;$i++){
  46.         $this_word=$words[$i];
  47.         $next_word=$words[$i+1];
  48.    
  49.         $l1=mb_strlen($this_word);
  50.         $l2=mb_strlen($next_word);
  51.        
  52.         $factor = is_punct(mb_substr($this_word,-1,1)) ? max($l1-1,$l2) : min($l1,$l2);
  53.        
  54.         $total+=$factor;
  55.         $deficite -= $l1+1;
  56.         $factors[$i]=$factor;
  57.         }
  58.     $deficite -= mb_strlen($words[count($words)-1]);
  59.        
  60.     $ret="";
  61.     for($i=0;$i<count($words)-1;$i++){
  62.         $n=round($deficite*$factors[$i]/$total);
  63.         $ret.=$words[$i].sp($n+1);
  64.         $deficite-=$n;
  65.         $total-=$factors[$i];
  66.         }  
  67.     $ret.=sp($deficite).$words[count($words)-1];
  68.    
  69.     return $ret;
  70.     }
  71.    
  72.    
  73. function preprocess($line){
  74.     $line=mb_ereg_replace("(\s|".NBSP.")([а-яёА-ЯЁ]|Не|Ни|не|ни)\s+","\\1\\2".NBSP,$line);
  75.     $line=mb_ereg_replace("(\s|".NBSP.")([а-яёА-ЯЁ]|Не|Ни|не|ни)\s+","\\1\\2".NBSP,$line);
  76.     $line=mb_ereg_replace("(\s|".NBSP.")([а-яёА-ЯЁ]|Не|Ни|не|ни)\s+","\\1\\2".NBSP,$line);
  77.     return $line;
  78.     }
  79.    
  80. function postprocess($line){
  81.     $line=mb_ereg_replace(NBSP," ",$line);
  82.     return $line;
  83.     }
  84.    
  85. function process($line,$len,$spaces){
  86.     global $outfile;
  87.     $line = trim($line);
  88.     $line = preprocess($line);
  89.     $subline = postprocess(crop($line,$len));
  90.     $line = mb_substr($line,mb_strlen($subline));
  91.     if($line){
  92.         $char=mb_substr($subline,-1,1);
  93.         if(is_punct($char))
  94.             $len++;
  95.         $subline = justify($subline,$len);
  96.         }
  97.     fwrite($outfile,$spaces.$subline."\n");
  98.     return $line;
  99.     }
  100.  
  101. $len     = $chars_per_line-$margin_right-$margin_left;
  102. $len1    = $len-$red_line;
  103. $spaces  = sp($margin_left);
  104. $spaces1 = sp($margin_left+$red_line);
  105.  
  106. mb_internal_encoding("UTF-8");
  107.  
  108. $infile  = fopen($input,"r");
  109. $outfile = fopen($output,"w");
  110.  
  111. $i=0;
  112.  
  113. while(($line=fgets($infile))!==FALSE){
  114.     $line=process($line,$len1,$spaces1);
  115.     while($line){
  116.         $line=process($line,$len,$spaces);
  117.         }
  118.     }
  119.    
  120. fclose($infile);
  121. fclose($outfile);
  122.  
  123. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement