Advertisement
Guest User

scrub_divinum

a guest
Jul 29th, 2011
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.48 KB | None | 0 0
  1. #! /usr/bin/env php
  2. <?php
  3.  
  4. array_shift( $argv );
  5.  
  6. if( $argc < 2 )
  7. {
  8.     $argv = glob( __DIR__ . "/M*.html" );
  9.     //echo "defaulting to everything " . implode( ', ', $argv );
  10. }
  11.  
  12. $index = "<html><head><title>Liturgy for ";
  13. $links = array();
  14. foreach( $argv as $file )
  15. {
  16.     $f = f( $file );
  17.     if( !$f ) continue;
  18.     $links[] = $f;
  19.     run( $f );
  20. }
  21. $index .= implode( ', ', $links ) . '</title></head><body>';
  22. foreach( $links as $d )
  23. {
  24.     $index .= "<div><a href=\"Horas-{$d}.html\">Horas-{$d}.html</a></div>";
  25. }
  26. $index .= '</body></html>';
  27.  
  28. file_put_contents( 'index.html', $index );
  29.  
  30. function f( $arg )
  31. {
  32.     if( preg_match("#([0-9]{1,2}-[0-9]{1,2})#", $arg, $matches ) )
  33.         return $matches[ 0 ];
  34.     return false;
  35. }  
  36.  
  37.  
  38. function run( $targ  )
  39. {
  40.     $d = __DIR__ . DIRECTORY_SEPARATOR;
  41.     $dir = glob( $d . $targ . DIRECTORY_SEPARATOR . '*' );
  42.  
  43.     @mkdir( "{$d}{$targ}_clean" );
  44.  
  45.     foreach( $dir as $fl )
  46.     {
  47.         process( $fl, dirname( $fl ) . '_clean' .
  48.             DIRECTORY_SEPARATOR . basename( $fl ), $targ );
  49.     }
  50.  
  51.     process( "${d}Master-$targ.html", "${d}Horas-$targ.html", $targ );
  52. }
  53.  
  54. function process( $fl, $out, $targ )
  55. {
  56.     $cont = file_get_contents( $fl );
  57.     $dom = new DomDocument();
  58.     @$dom->loadHTML( $cont );
  59.     remove( $dom, "font", "td", "tr", "tbody", "table" );
  60.     clean_references( $dom, $targ );
  61.     $cont = @$dom->saveXML();
  62.     file_put_contents( $out, $cont );
  63. }  
  64.  
  65. function remove( $dom )
  66. {
  67.     $args = array_slice( func_get_args(), 1 );
  68.     foreach( $args as $tag )
  69.     {
  70.         while( $dom->getElementsByTagName( $tag )->length )
  71.         {
  72.             foreach( $dom->getElementsByTagName( $tag ) as $node )
  73.             {
  74.                 $parent = $node->parentNode;
  75.                
  76.                 $nodes = array();
  77.                 foreach( $node->childNodes as $child )
  78.                 {
  79.                     $nodes[] = $child;
  80.                 }
  81.                 graduate( $nodes, $node, $parent );
  82.                 $parent->removeChild( $node );
  83.             }
  84.         }
  85.     }
  86. }
  87.  
  88. function graduate( $list, $parent, $grandparent )
  89. {
  90.     foreach( $list as $node )
  91.         $grandparent->insertBefore( $node, $parent );
  92. }
  93.  
  94. function clean_references( $data, $targ )
  95. {
  96.     foreach( $data->getElementsByTagName( "a" ) as $a )
  97.     {
  98.         $href = $a->getAttribute( "href" );
  99.         $href = str_replace( $targ, "{$targ}_clean", $href );
  100.         $a->setAttribute( "href", $href );
  101.     }
  102. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement