Advertisement
dgobrien

dafont.com scaper

Jan 9th, 2015
390
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 3.57 KB | None | 0 0
  1. <?php
  2. // dafont.com sucker
  3. $maxpages = 1;
  4. $url = "http://www.dafont.com/alpha.php?lettre=a&page=" . $maxpages . "&fpp=100";
  5. $array = file_get_contents( $url );
  6. getTextBetweenTags( 'a' , $array , 1 );
  7. $lettres = str_split('abcdefghijklmnopqrstuvwxyz#');
  8. $oldletter = 'a';
  9. $pagenum = 1;
  10. $counter = 1;
  11. echo "\n\n";
  12. foreach ( $lettres as $num => $letter ) {
  13.     if ( $letter != $oldletter ) {
  14.         // GET PAGES FOR EACH LETTER
  15.         getTextBetweenTags( 'a' , "http://www.dafont.com/alpha.php?lettre=" . urlencode( $letter ) . "&page=1&fpp=100" , 1 );
  16.         $pagenum = 1;
  17.     }
  18.     while ( $pagenum <= $maxpages ) {
  19.         echo "Letter: " . $letter . ' page: ' . $pagenum . "\n";
  20.         $url = "http://www.dafont.com/alpha.php?lettre=" . urlencode($letter) . "&page=" . $pagenum . "&fpp=100";
  21.         $array = file_get_contents( $url );
  22.         $output[] = getTextBetweenTags( 'a' , $array , 0 , $letter );
  23.         $oldletter = $letter;
  24.         $pagenum ++;
  25.     }
  26. }
  27.  
  28. function getTextBetweenTags( $tag, $html, $strict = 0 , $letter = "" ) {
  29.     global $maxpages, $counter;
  30.     $dom = new domDocument;
  31.     @$dom->loadHTML($html);
  32.     $content = $dom->getElementsByTagname( $tag );
  33.     $out = array();
  34.     foreach ($content as $item) {      
  35.         $valueID = $item->getAttribute('class');
  36.         if ( substr( $item->getAttribute('href') , 0 , 10) == 'mtheme.php'  && $strict == 0 ) {
  37.                 $masterTheme = $item->childNodes->item(0)->nodeValue;
  38.                 $subTheme = "";
  39.         }
  40.         if ( substr( $item->getAttribute('href') , 0 , 10) == 'theme.php?'  && $strict == 0 ) {
  41.                 $subTheme = $item->childNodes->item(0)->nodeValue;             
  42.         }
  43.         if ( $valueID == 'dl' && $strict == 0 ) {
  44.             $out[] = array( "class"=>$item->getAttribute('class') , "href"=>$item->getAttribute('href'));
  45.             $dir = realpath(null) . "/dafonts/fonts_" . $letter . "/";
  46.             if ( $masterTheme == "" ) $masterTheme = "Misc";
  47.             if ( $subTheme == "" ) $subTheme = "Misc";
  48.             $structure = realpath(null) . "/dafonts/" . $masterTheme . "/" . $subTheme . "/";
  49.             @mkdir( $structure , 0777 , true );
  50.             @mkdir( $dir , 0777, true );
  51.             $file = $item->getAttribute('href');
  52.             list($junk , $filename ) = explode( "f=" , $file );
  53.             get_file1( $file , $dir , $filename . '.zip' , $file );
  54.             echo "\tCreating SymLink: " . $structure . $filename . ".zip for Source: " . $dir . $filename . '.zip' . "\n";
  55.             @symlink( $dir . $filename . '.zip' , $structure . $filename . ".zip" );
  56.             $counter = $counter + 1;
  57.             $masterTheme = "";
  58.         } else {
  59.             if ( substr( $item->getAttribute('href') , 0 , 10) == 'alpha.php?' ) {
  60.                 $pageurl = $item->getAttribute('href');
  61.                 list( $crap , $page ) = explode( 'page=', $pageurl );
  62.                 list( $page , $crap ) = explode( '&', $page );
  63.                 if ( $strict == 1 && $page > $maxpages ) $maxpages = $page;
  64.             }
  65.         }
  66.     }
  67.     return $out;
  68. }
  69.  
  70. function get_file1($file, $local_path, $newfilename , $referer) {
  71.     global $counter;
  72.     $err_msg = '';
  73.     echo $counter . ":\tAttempting download for $file\n";
  74.     $out = fopen($local_path.$newfilename,"wb");
  75.     if ($out == FALSE){
  76.       print "File not opened \n";
  77.       exit;
  78.     }
  79.     $ch = curl_init();
  80.     curl_setopt($ch, CURLOPT_FILE, $out);
  81.     curl_setopt($ch, CURLOPT_HEADER, 0);
  82.     curl_setopt($ch, CURLOPT_URL, $file);
  83.     curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
  84.     curl_setopt($ch, CURLOPT_HTTPHEADER, array('Referer: ' . $referer ));
  85.     curl_exec($ch);
  86.     if ( curl_error( $ch ) ) echo " Error is : " . curl_error( $ch ) . "\n";
  87.     curl_close($ch);
  88. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement