Guest User

nb-dl/download.php

a guest
Jul 6th, 2018
147
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 8.43 KB | None | 0 0
  1. <!DOCTYPE html>
  2. <html lang="no-NO">
  3. <head>
  4. <meta charset="utf-8">
  5. </head>
  6.  
  7. <h1><img src="nblogo.png"> Last ned bøker fra nasjonalbiblioteket</h1>
  8. Dette scriptet laster ned bøker gitt ei liste med bok-IDer. Boka fra <a href="https://nb.no">nasjonalbiblioteket</a> lastes ned i form av jpg-bilder. Deretter kan du lage en pdf-fil av bildene ved hjelp av dir2pdf.php. Finn bok-IDer i nb_liste.txt eller i nb_liste-html-filene hvor du kan trykke "Last ned" for å legge bok-IDen til lista i fila "bookids.txt". Disse bøkene lastes da ned i egen bokID-mappe i tmpbooks/. Disse mappene prosesseres til pdf-filer til mappa books/ ved hjelp av dir2pdf.php.
  9. <p>
  10.  
  11. <?php
  12. ini_set('memory_limit','512M');
  13. set_time_limit (0);
  14. error_reporting(E_ERROR | E_WARNING | E_PARSE);
  15.  
  16.  
  17. /*********************************************************************************************
  18.  
  19. $level - variabelen setter størrelse/kvalitet på boka:
  20. 3: God, lesbar kvalitet (en normal bokside blir ca. 100 kb, dvs. bok på 150 sider blir ca. 15 mb).
  21. 4: Bedre kvalitet (ca. 250 kb/bokside, bok ca. 37 mb).
  22. 5: Er max størrelse/kvalitet (ca. 500 kb/bokside, bok ca. 75 mb).
  23.  
  24. *********************************************************************************************/
  25. $level = 3;
  26.  
  27.  
  28.  
  29. $maxlevel = 5;
  30. $frompage = 1;
  31. $topage = 9999;
  32. $x = 1024;
  33. $y = 1024;
  34. $resx = 9999;
  35. $resy = 9999;
  36.  
  37. function mergeall($cols, $rows) {
  38.     global $bookid;
  39.     for ($row = 0; $row <= $rows; $row++) {
  40.         for ($col = 0; $col <= $cols; $col++) {
  41.             if ($col == 0) {
  42.                 rename("tmpbooks/$bookid/col$col"."row$row.jpg", "tmpbooks/$bookid/colrow$row.jpg");
  43.             } else {
  44.                 merge("tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/col$col"."row$row.jpg", "tmpbooks/$bookid/colrow$row.jpg", 'hori');
  45.             }
  46.         }
  47.         if ($row == 0) {
  48.             rename("tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/colrow.jpg");
  49.         } else {
  50.             merge("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/colrow.jpg", 'vert');
  51.         }
  52.     }
  53. }
  54.  
  55. function merge($filename_1, $filename_2, $filename_result, $direction) {
  56.     // Get dimensions for specified images
  57.     list($width_1, $height_1) = @getimagesize($filename_1);
  58.     list($width_2, $height_2) = @getimagesize($filename_2);
  59.    
  60.     // Load images
  61.     $image_1 = @imagecreatefromjpeg($filename_1);
  62.     $image_2 = @imagecreatefromjpeg($filename_2);
  63.  
  64.     if ($direction == 'vert') {
  65.         // Create new image with desired dimensions
  66.         $image = imagecreatetruecolor($width_1, $height_1 + $height_2);
  67.    
  68.         //Copy
  69.         imagecopy($image, $image_1, 0, 0, 0, 0, $width_1, $height_1);
  70.         imagecopy($image, $image_2, 0, $height_1, 0, 0, $width_2, $height_2);
  71.     }
  72.     elseif ($direction == 'hori') {
  73.         // Create new image with desired dimensions
  74.         $image = imagecreatetruecolor($width_1 + $width_2, $height_1);
  75.    
  76.         //Copy
  77.         @imagecopy($image, $image_1, 0, 0, 0, 0, $width_1, $height_1);
  78.         @imagecopy($image, $image_2, $width_1, 0, 0, 0, $width_2, $height_2);
  79.     }
  80.    
  81.     // Save the resulting image to disk (as JPEG)
  82.     imagejpeg($image, $filename_result);
  83.    
  84.     // Clean up
  85.     @imagedestroy($image);
  86.     @imagedestroy($image_1);
  87.     @imagedestroy($image_2);
  88. }
  89.  
  90. // Gets the data from a URL
  91. function get_data($url) {
  92.     $ch = curl_init();
  93.     $timeout = 15;
  94.     curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
  95.     curl_setopt($ch,CURLOPT_SSL_VERIFYPEER, false);
  96.     curl_setopt($ch, CURLOPT_URL, $url);
  97.     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  98.     curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
  99.     $data = curl_exec($ch);
  100.     $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
  101.     if (!strstr($content_type,"image/jpeg")) {
  102.         return NULL;
  103.     }
  104.     if ($data === false) {
  105.         echo 'Curl error: ' . curl_error($ch);
  106.     }
  107.     curl_close($ch);
  108.     return $data;
  109. }
  110.  
  111. $ids = file('bookids.txt');
  112. $hang = fopen('hang.txt',"a+"); // list of hanged-up and aborted books
  113.  
  114. foreach ($ids as $id) {
  115.     $bookid = trim($id);
  116.     if (!$bookid) continue;
  117.     if (file_exists("tmpbooks/$bookid")) continue;
  118.     @mkdir("tmpbooks/$bookid");
  119.     echo "laster ned bok $bookid...<br>"; ob_flush();
  120.     unset($covernotfound);
  121.     unset($tworows);
  122.    
  123.     // Check how many columns a page has
  124.     for ($checkcol=0; $checkcol<=8;$checkcol++) {
  125.     //echo "check $checkcol... ";
  126.         $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_0001&maxLevel=$maxlevel&level=$level&col=$checkcol&row=0&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
  127.         $file_headers = get_headers($url);
  128.         if (strstr($file_headers[0],"200")) {
  129.             $cols = $checkcol;
  130.         } else break;
  131.     }
  132.    
  133.     // Check how many rows a page has
  134.     for ($checkrow=0; $checkrow<=8;$checkrow++) {
  135.     //echo "check $checkrow... ";
  136.         $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_0001&maxLevel=$maxlevel&level=$level&col=0&row=$checkrow&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
  137.         $file_headers = get_headers($url);
  138.         if (strstr($file_headers[0],"200")) {
  139.             $rows = $checkrow;
  140.         } else break;
  141.     }
  142.    
  143.     if ($cols==0 and $rows==0) echo "sidene i boka er ikke delt.<br>";
  144.     else { $nocols = $cols+1; $norows = $rows+1;
  145.         echo "sidene i boka er delt i $nocols kolonne(r) og $norows rekke(r).<br>";
  146.     }
  147.    
  148.     // Get covers
  149.     for ($c = 1; $c <=3; $c++) {
  150.         echo "henter cover $c...<br>"; ob_flush();
  151.         for ($col = 0; $col <= $cols; $col++) {
  152.             for ($row = 0; $row <= $rows; $row++) {
  153.                 unset($data);
  154.                 unset($loop);
  155.                 $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_C".$c."&maxLevel=$maxlevel&level=$level&col=$col&row=$row&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
  156.                 $file_headers = get_headers($url);
  157.                 if(strstr($file_headers[0],"404") or strstr($file_headers[0],"403")) {
  158.                     $covernotfound++;
  159.                     continue;
  160.                 }
  161.                 $data = get_data($url);
  162.                 /*
  163.                 while (strlen($data)<1000) {
  164.                     $data = get_data($url);
  165.                 }
  166.                 */
  167.                 while (!$data) {
  168.                     if ($loop > 10) {
  169.                         //rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
  170.                         //echo "boka hang seg opp<br>"; ob_flush();
  171.                         break 3;                   
  172.                     }
  173.                     $loop++;
  174.                     get_data($url);
  175.                 }
  176.                 file_put_contents("tmpbooks/$bookid/col$col"."row$row.jpg", $data);
  177.             }
  178.         }
  179.         mergeall($cols, $rows);
  180.         rename("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/cover$c.jpg");
  181.         for ($row = 0; $row <= $rows; $row++) {
  182.             for ($col = 0; $col <= $cols; $col++) {
  183.                 @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
  184.             }
  185.             @unlink("tmpbooks/$bookid/colrow$row.jpg");
  186.         }
  187.     }
  188.    
  189.     for ($row = 0; $row <= $rows; $row++) {
  190.         for ($col = 0; $col <= $cols; $col++) {
  191.             @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
  192.         }
  193.     @unlink("tmpbooks/$bookid/colrow$row.jpg");
  194.     }
  195.  
  196.     // Get pages
  197.     for ($page = $frompage; $page <= $topage; $page++) {
  198.         $curr_page = str_pad($page, 4, '0', STR_PAD_LEFT);
  199.         echo "henter side $curr_page...<br>"; ob_flush();
  200.         for ($col = 0; $col <= $cols; $col++) {
  201.             for ($row = 0; $row <= $rows; $row++) {
  202.                 unset($data);
  203.                 unset($loop);
  204.                 $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_".$curr_page."&maxLevel=$maxlevel&level=$level&col=$col&row=$row&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
  205.  
  206.                 $file_headers = get_headers($url);
  207.                 if(strstr($file_headers[0],"404") or strstr($file_headers[0],"403")) {
  208.                     if (($page == $frompage) and $covernotfound >= 2) {
  209.                         rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
  210.                         echo "fant ikke boka<br>"; ob_flush();
  211.                         break 3;
  212.                     }
  213.                     break 3;
  214.                 }
  215.                 $data = get_data($url);
  216.                 /*
  217.                 while (strlen($data)<2000) {
  218.                     $data = get_data($url);
  219.                 }
  220.                 */
  221.                 while (!$data) {
  222.                     if ($loop > 10) {
  223.                         rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
  224.                         fputs($hang, "$bookid\n");
  225.                         echo "boka hang seg opp<br>"; ob_flush();
  226.                         break 4;
  227.                         //echo "hopper over sida"; ob_flush();
  228.                         //break 2;                 
  229.                     }
  230.                     $loop++;
  231.                     get_data($url);
  232.                 }
  233.                 file_put_contents("tmpbooks/$bookid/col$col"."row$row.jpg", $data);
  234.             }
  235.         }
  236.         mergeall($cols, $rows);
  237.         rename("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/p$curr_page.jpg");     
  238.     }
  239.     for ($row = 0; $row <= $rows; $row++) {
  240.         for ($col = 0; $col <= $cols; $col++) {
  241.             @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
  242.         }
  243.     @unlink("tmpbooks/$bookid/colrow$row.jpg");
  244.     }
  245.     echo "OK<p>";
  246. }
  247.  
  248. fclose($hang);
  249.  
  250. ?>
Add Comment
Please, Sign In to add comment