Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <!DOCTYPE html>
- <html lang="no-NO">
- <head>
- <meta charset="utf-8">
- </head>
- <h1><img src="nblogo.png"> Last ned bøker fra nasjonalbiblioteket</h1>
- Dette scriptet laster ned bøker gitt ei liste med bok-IDer. Boka fra <a href="https://nb.no">nasjonalbiblioteket</a> lastes ned i form av jpg-bilder. Deretter kan du lage en pdf-fil av bildene ved hjelp av dir2pdf.php. Finn bok-IDer i nb_liste.txt eller i nb_liste-html-filene hvor du kan trykke "Last ned" for å legge bok-IDen til lista i fila "bookids.txt". Disse bøkene lastes da ned i egen bokID-mappe i tmpbooks/. Disse mappene prosesseres til pdf-filer til mappa books/ ved hjelp av dir2pdf.php.
- <p>
- <?php
- ini_set('memory_limit','512M');
- set_time_limit (0);
- error_reporting(E_ERROR | E_WARNING | E_PARSE);
- /*********************************************************************************************
- $level - variabelen setter størrelse/kvalitet på boka:
- 3: God, lesbar kvalitet (en normal bokside blir ca. 100 kb, dvs. bok på 150 sider blir ca. 15 mb).
- 4: Bedre kvalitet (ca. 250 kb/bokside, bok ca. 37 mb).
- 5: Er max størrelse/kvalitet (ca. 500 kb/bokside, bok ca. 75 mb).
- *********************************************************************************************/
- $level = 3;
- $maxlevel = 5;
- $frompage = 1;
- $topage = 9999;
- $x = 1024;
- $y = 1024;
- $resx = 9999;
- $resy = 9999;
- function mergeall($cols, $rows) {
- global $bookid;
- for ($row = 0; $row <= $rows; $row++) {
- for ($col = 0; $col <= $cols; $col++) {
- if ($col == 0) {
- rename("tmpbooks/$bookid/col$col"."row$row.jpg", "tmpbooks/$bookid/colrow$row.jpg");
- } else {
- merge("tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/col$col"."row$row.jpg", "tmpbooks/$bookid/colrow$row.jpg", 'hori');
- }
- }
- if ($row == 0) {
- rename("tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/colrow.jpg");
- } else {
- merge("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/colrow$row.jpg", "tmpbooks/$bookid/colrow.jpg", 'vert');
- }
- }
- }
- function merge($filename_1, $filename_2, $filename_result, $direction) {
- // Get dimensions for specified images
- list($width_1, $height_1) = @getimagesize($filename_1);
- list($width_2, $height_2) = @getimagesize($filename_2);
- // Load images
- $image_1 = @imagecreatefromjpeg($filename_1);
- $image_2 = @imagecreatefromjpeg($filename_2);
- if ($direction == 'vert') {
- // Create new image with desired dimensions
- $image = imagecreatetruecolor($width_1, $height_1 + $height_2);
- //Copy
- imagecopy($image, $image_1, 0, 0, 0, 0, $width_1, $height_1);
- imagecopy($image, $image_2, 0, $height_1, 0, 0, $width_2, $height_2);
- }
- elseif ($direction == 'hori') {
- // Create new image with desired dimensions
- $image = imagecreatetruecolor($width_1 + $width_2, $height_1);
- //Copy
- @imagecopy($image, $image_1, 0, 0, 0, 0, $width_1, $height_1);
- @imagecopy($image, $image_2, $width_1, 0, 0, 0, $width_2, $height_2);
- }
- // Save the resulting image to disk (as JPEG)
- imagejpeg($image, $filename_result);
- // Clean up
- @imagedestroy($image);
- @imagedestroy($image_1);
- @imagedestroy($image_2);
- }
- // Gets the data from a URL
- function get_data($url) {
- $ch = curl_init();
- $timeout = 15;
- curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
- curl_setopt($ch,CURLOPT_SSL_VERIFYPEER, false);
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
- $data = curl_exec($ch);
- $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
- if (!strstr($content_type,"image/jpeg")) {
- return NULL;
- }
- if ($data === false) {
- echo 'Curl error: ' . curl_error($ch);
- }
- curl_close($ch);
- return $data;
- }
- $ids = file('bookids.txt');
- $hang = fopen('hang.txt',"a+"); // list of hanged-up and aborted books
- foreach ($ids as $id) {
- $bookid = trim($id);
- if (!$bookid) continue;
- if (file_exists("tmpbooks/$bookid")) continue;
- @mkdir("tmpbooks/$bookid");
- echo "laster ned bok $bookid...<br>"; ob_flush();
- unset($covernotfound);
- unset($tworows);
- // Check how many columns a page has
- for ($checkcol=0; $checkcol<=8;$checkcol++) {
- //echo "check $checkcol... ";
- $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_0001&maxLevel=$maxlevel&level=$level&col=$checkcol&row=0&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
- $file_headers = get_headers($url);
- if (strstr($file_headers[0],"200")) {
- $cols = $checkcol;
- } else break;
- }
- // Check how many rows a page has
- for ($checkrow=0; $checkrow<=8;$checkrow++) {
- //echo "check $checkrow... ";
- $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_0001&maxLevel=$maxlevel&level=$level&col=0&row=$checkrow&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
- $file_headers = get_headers($url);
- if (strstr($file_headers[0],"200")) {
- $rows = $checkrow;
- } else break;
- }
- if ($cols==0 and $rows==0) echo "sidene i boka er ikke delt.<br>";
- else { $nocols = $cols+1; $norows = $rows+1;
- echo "sidene i boka er delt i $nocols kolonne(r) og $norows rekke(r).<br>";
- }
- // Get covers
- for ($c = 1; $c <=3; $c++) {
- echo "henter cover $c...<br>"; ob_flush();
- for ($col = 0; $col <= $cols; $col++) {
- for ($row = 0; $row <= $rows; $row++) {
- unset($data);
- unset($loop);
- $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_C".$c."&maxLevel=$maxlevel&level=$level&col=$col&row=$row&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
- $file_headers = get_headers($url);
- if(strstr($file_headers[0],"404") or strstr($file_headers[0],"403")) {
- $covernotfound++;
- continue;
- }
- $data = get_data($url);
- /*
- while (strlen($data)<1000) {
- $data = get_data($url);
- }
- */
- while (!$data) {
- if ($loop > 10) {
- //rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
- //echo "boka hang seg opp<br>"; ob_flush();
- break 3;
- }
- $loop++;
- get_data($url);
- }
- file_put_contents("tmpbooks/$bookid/col$col"."row$row.jpg", $data);
- }
- }
- mergeall($cols, $rows);
- rename("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/cover$c.jpg");
- for ($row = 0; $row <= $rows; $row++) {
- for ($col = 0; $col <= $cols; $col++) {
- @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
- }
- @unlink("tmpbooks/$bookid/colrow$row.jpg");
- }
- }
- for ($row = 0; $row <= $rows; $row++) {
- for ($col = 0; $col <= $cols; $col++) {
- @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
- }
- @unlink("tmpbooks/$bookid/colrow$row.jpg");
- }
- // Get pages
- for ($page = $frompage; $page <= $topage; $page++) {
- $curr_page = str_pad($page, 4, '0', STR_PAD_LEFT);
- echo "henter side $curr_page...<br>"; ob_flush();
- for ($col = 0; $col <= $cols; $col++) {
- for ($row = 0; $row <= $rows; $row++) {
- unset($data);
- unset($loop);
- $url = "https://www.nb.no/services/image/resolver?url_ver=geneza&urn=URN:NBN:no-nb_digibok_".$bookid."_".$curr_page."&maxLevel=$maxlevel&level=$level&col=$col&row=$row&resX=$resx&resY=$resy&tileWidth=$x&tileHeight=$y";
- $file_headers = get_headers($url);
- if(strstr($file_headers[0],"404") or strstr($file_headers[0],"403")) {
- if (($page == $frompage) and $covernotfound >= 2) {
- rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
- echo "fant ikke boka<br>"; ob_flush();
- break 3;
- }
- break 3;
- }
- $data = get_data($url);
- /*
- while (strlen($data)<2000) {
- $data = get_data($url);
- }
- */
- while (!$data) {
- if ($loop > 10) {
- rename("tmpbooks/$bookid", "trash/$bookid"."-rnd-".rand(100,999));
- fputs($hang, "$bookid\n");
- echo "boka hang seg opp<br>"; ob_flush();
- break 4;
- //echo "hopper over sida"; ob_flush();
- //break 2;
- }
- $loop++;
- get_data($url);
- }
- file_put_contents("tmpbooks/$bookid/col$col"."row$row.jpg", $data);
- }
- }
- mergeall($cols, $rows);
- rename("tmpbooks/$bookid/colrow.jpg", "tmpbooks/$bookid/p$curr_page.jpg");
- }
- for ($row = 0; $row <= $rows; $row++) {
- for ($col = 0; $col <= $cols; $col++) {
- @unlink("tmpbooks/$bookid/col$col"."row$row.jpg");
- }
- @unlink("tmpbooks/$bookid/colrow$row.jpg");
- }
- echo "OK<p>";
- }
- fclose($hang);
- ?>
Add Comment
Please, Sign In to add comment