IWBH_01

wayback machine indexer

Oct 8th, 2020 (edited)
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 3.09 KB | None | 0 0
  1. <?php
  2. /*open source wayback machine indexer
  3. depends:
  4. https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server
  5.  
  6.  
  7. //maker:
  8. $fi=fopen("wayback.index.php","w");
  9. fwrite($fi,file_get_contents("https://pastebin.com/raw/pEtsC7zE"));
  10. fclose($fi);
  11.  
  12. echo "did?";
  13.  
  14. //or:
  15. if(isset($_GET["a2"])&&isset($_GET["name"])){
  16. $fi=fopen($_GET["name"],"w");
  17. fwrite($fi,"http".$_GET["a2"]);
  18. fclose($fi);
  19.  
  20. echo "did?";
  21. }elseif(isset($_GET["bak"])){
  22.  echo file_get_contents($_GET["bak"]);
  23. }else
  24.     echo "no url?";
  25. //and:
  26. ?a2=https://pastebin.com/raw/pEtsC7zE&name=wayback.index.php
  27.  
  28. */
  29.  
  30. $html='<!Doctype html><html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>';
  31.  
  32. if(isset($_SERVER["PATH_INFO"])&&$_SERVER["PATH_INFO"]=="/itall_ind.html"){
  33.   $dats=scandir("./");
  34.   $large="";
  35.   foreach($dats as $fn){
  36.     if(substr($fn,strlen($fn)-5)==".html"){
  37.       $large.='<a href="/./'.$fn.'">'.$fn."</a><br>\r\n";
  38.     }
  39.   }
  40.   echo $html."all</title></head><body><br>\r\n".$large."\r\n</body></html>";
  41.   exit();
  42. }
  43.  
  44. $url="";
  45. if(isset($_GET["url"]))
  46.  $url=$_GET["url"];
  47. else{
  48.  exit("error, no url param. make like so:<br>\r\n".$_SERVER["SCRIPT_URL"]."?url=pastebin.com/*&from=20191018");
  49. }
  50.  
  51. if(isset($_GET["resumeKey"]))
  52.  $hrk="&resumeKey=".$_GET["resumeKey"];
  53. else
  54.  $hrk="";
  55.  
  56. $date="";
  57. if(isset($_GET["from"]))
  58.  $date.="&from=".$_GET["from"];
  59.  
  60. if(isset($_GET["to"]))
  61.  $date.="&to=".$_GET["to"];
  62.  
  63.  
  64.  
  65. $dat=file_get_contents("https://web.archive.org/cdx/search/cdx?url=".$url."&limit=2500&filter=statuscode:(200|206)&collapse=urlkey".$date."&showResumeKey=true".$hrk);
  66.  
  67. $dat=str_replace("\r\n","\n",$dat);
  68. $dat=str_replace("\n\r","\n",$dat);
  69. $dat=str_replace("\r","\n",$dat);
  70.  
  71. $lines=explode("\n",$dat);
  72.  
  73.  
  74. $outp="";
  75.  
  76. foreach($lines as $line){
  77.  $parts=explode(" ", $line);
  78.  $cp=count($parts);
  79.  if($cp>2)
  80.   $outp.='<a href="https://web.archive.org/web/'.$parts[1].'/'.$parts[2].'" >'.$parts[2].'</a> '.$parts[1]."<br>\r\n";
  81.  elseif($cp==1&&$parts[0]){
  82.  
  83.   $outp.="<br>\r\n".'<a href="'.$_SERVER["SCRIPT_URL"].'?url='.$url.$date.'&resumeKey='.str_replace("+","%2B",$parts[0]).'" > Next page </a>';
  84.  
  85.  }
  86. }
  87.  
  88.  
  89. $t2=str_replace("/","_",$_SERVER["QUERY_STRING"]);
  90.  
  91.  
  92. $title="wayback.index.".preg_replace("/[^a-zA-Z0-9_\.]/","-",$t2)."_".dechex(time()).".html";
  93.  
  94. $outp2=$html."wayback machine index</title></head><body>\r\n<br>".$title."<br>\r\n".$outp."<br><br></body></html>";
  95.  
  96. $fi2=fopen($title,"w");
  97. fwrite($fi2,$outp2);
  98. fclose($fi2);
  99.  
  100.  $nl="\n";
  101.  
  102. if(file_exists("wayback.index.sitemap.xml"))
  103.  $smdat=file_get_contents("wayback.index.sitemap.xml");
  104. else{
  105.  $smdat='<?xml version="1.0" encoding="UTF-8"?>'.$nl.'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.$nl.'</urlset>';
  106. }
  107.  
  108.  
  109. $lel=strlen($smdat);
  110. if($lel>200) $lel-=200;
  111. else $lel=15;
  112. $eue=stripos($smdat,"</urlset>",$lel);
  113.  
  114. $haz=substr($smdat,0,$eue).'<url><loc>'.$title.'</loc><lastmod>'.date(DATE_ATOM,time()).'</lastmod></url>'.$nl.substr($smdat,$eue);
  115.  
  116. $fi3=fopen("wayback.index.sitemap.xml","w");
  117. fwrite($fi3,$haz);
  118. fclose($fi3);
  119.  
  120. //finally done coding?
  121. echo $outp2;
  122. ?>
Add Comment
Please, Sign In to add comment