simpleruser

Untitled

Jun 20th, 2021
710
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2. $username = "SET USERNAME HERE";
  3. $baseUrl = "https://archiveofourown.org/users/$username/works";
  4. mkdir($username);
  5. chdir($username);
  6. $ao3Page = file_get_contents($baseUrl);
  7.  
  8.  
  9. $numWorks = array();
  10. preg_match('/<li><span class=\\"current\\">Works \\((\\d+)\\)<\\/span><\\/li>/', $ao3Page, $numWorks);
  11. //// uncomment the section below to write the works  
  12. //$metaFile = fopen("meta.txt","w");
  13. //fwrite($metaFile,"Username: $username\t");
  14. //fwrite($metaFile,"Total works: " . $numWorks[1] . "\n");
  15. //fwrite($metaFile, "AO3 FIC ID\tFIC NAME\tFIC URL\n");
  16. //
  17. //$ficURLs = array();
  18. //preg_match_all('/<a href=\\"\\/works\\/(?<ficid>\\d+)">(?<ficname>.+)<\\/a>/',$ao3Page,$ficURLs);
  19. //
  20. //foreach($ficURLs["ficid"] as $ficid){
  21. //    $ficPageUrl = "https://archiveofourown.org/works/$ficid?view_adult=true&view_full_work=true";
  22. //    $ficPageContent = file_get_contents($ficPageUrl);
  23. //    $downloadLink = array();
  24. //    preg_match('/<a href=\\"(\\/downloads\\/.+)\\">HTML<\\/a>/',$ficPageContent,$downloadLink);
  25. //    file_put_contents($ficid . ".html",file_get_contents("https://archiveofourown.org" . $downloadLink[1]));
  26. //    $title = array();
  27. //    
  28. //    preg_match('/<h2 class=\\"title heading\\">.*\\R\\s*(.+)\\s*\\R.*<\\/h2>/',$ficPageContent,$title);
  29. //    print_r($title);
  30. //    fwrite($metaFile, "$ficid\t".$title[1]."\t$ficPageUrl\n");
  31. //}
  32. $totalPages = $numWorks[1]/20 + 1;
  33.  
  34.  
  35. $metaFile = fopen("meta.txt","a");
  36. $reachedContinueFic = false;
  37. for($i = 1; $i <= $totalPages; $i++){
  38.     $ao3Page = file_get_contents($baseUrl."?page=$i");
  39.  
  40.     $ficURLs = array();
  41.     preg_match_all('/<a href=\\"\\/works\\/(?<ficid>\\d+)">(?<ficname>.+)<\\/a>/',$ao3Page,$ficURLs);
  42.  
  43.     foreach($ficURLs["ficid"] as $ficid){
  44.         if($ficid == 228621 ){ // changed this manually based on the most recent fic downloaded
  45.             $reachedContinueFic = true;
  46.         }
  47.         if($reachedContinueFic == true){
  48.             $ficPageUrl = "https://archiveofourown.org/works/$ficid?view_adult=true&view_full_work=true";
  49.             $ficPageContent = file_get_contents($ficPageUrl);
  50.             $downloadLink = array();
  51.             preg_match('/<a href=\\"(\\/downloads\\/.+)\\">HTML<\\/a>/',$ficPageContent,$downloadLink);
  52.             file_put_contents($ficid . ".html",file_get_contents("https://archiveofourown.org" . $downloadLink[1]));
  53.             $title = array();
  54.  
  55.             preg_match('/<h2 class=\\"title heading\\">.*\\R\\s*(.+)\\s*\\R.*<\\/h2>/',$ficPageContent,$title);
  56.             print_r($title);
  57.             fwrite($metaFile, "$ficid\t".$title[1]."\t$ficPageUrl\n");
  58.         }
  59.     }
  60.     sleep(10);
  61. }
  62.  
  63. fclose($metaFile);
  64.  
  65.  
  66. ?>
RAW Paste Data