Advertisement
Guest User

Untitled

a guest
Jun 29th, 2013
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.20 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3.  
  4. #$BASE = "/media/disk/vpp";
  5. $BASE = "~/Projects/vaultscraper/";
  6. $DIRNAME;
  7. $SLEEP = 0;
  8.  
  9.  
  10. $arg = shift;
  11. if($arg eq "characters"){
  12. $OUTPUT = "$BASE/characters";
  13. $LAST = &get_last_entry(characters);
  14. print "lastentry: $LAST\n";
  15. $URL = "http://nwvault.ign.com/View.php?view=Characters.Detail\\&id=";
  16. }elsif($arg eq "creatures"){
  17. $OUTPUT = "$BASE/creatures";
  18. $LAST = &get_last_entry(creatures);
  19. print "lastentry: $LAST\n";
  20. $URL = "http://nwvault.ign.com/View.php?view=Creatures.Detail\\&id=";
  21. }elsif($arg eq "hakpaks"){
  22. $OUTPUT = "$BASE/hakpaks";
  23. $LAST = &get_last_entry(hakpaks);
  24. print "lastentry: $LAST\n";
  25. $URL = "http://nwvault.ign.com/View.php?view=Hakpaks.Detail\\&id=";
  26. }elsif($arg eq "models"){
  27. $OUTPUT = "$BASE/models";
  28. $LAST = &get_last_entry(models);
  29. print "lastentry: $LAST\n";
  30. $URL = "http://nwvault.ign.com/View.php?view=Models.Detail\\&id=";
  31. }elsif($arg eq "modules"){
  32. $OUTPUT = "$BASE/modules";
  33. $LAST = &get_last_entry(modules);
  34. print "lastentry: $LAST\n";
  35. $URL = "http://nwvault.ign.com/View.php?view=Modules.Detail\\&id=";
  36. }elsif($arg eq "movies"){
  37. $OUTPUT = "$BASE/movies";
  38. $LAST = &get_last_entry(movies);
  39. print "lastentry: $LAST\n";
  40. $URL = "http://nwvault.ign.com/View.php?view=Movies.Detail\\&id=";
  41. }elsif($arg eq "other"){
  42. $OUTPUT = "$BASE/other";
  43. $LAST = &get_last_entry(other);
  44. print "lastentry: $LAST\n";
  45. $URL = "http://nwvault.ign.com/View.php?view=Other.Detail\\&id=";
  46. }elsif($arg eq "prefabs"){
  47. $OUTPUT = "$BASE/prefabs";
  48. $LAST = &get_last_entry(prefabs);
  49. print "lastentry: $LAST\n";
  50. $URL = "http://nwvault.ign.com/View.php?view=Prefabs.Detail\\&id=";
  51. }elsif($arg eq "portraits"){
  52. $OUTPUT = "$BASE/portraits";
  53. $LAST = &get_last_entry(portraits);
  54. print "lastentry: $LAST\n";
  55. $URL = "http://nwvault.ign.com/View.php?view=Portraits.Detail\\&id=";
  56. }elsif($arg eq "screenshots"){
  57. $OUTPUT = "$BASE/screenshots";
  58. $LAST = &get_last_entry(screenshots);
  59. print "lastentry: $LAST\n";
  60. $URL = "http://nwvault.ign.com/View.php?view=Screenshots.Detail\\&id=";
  61. }elsif($arg eq "scripts"){
  62. $OUTPUT = "$BASE/scripts";
  63. $LAST = &get_last_entry(scripts);
  64. print "lastentry: $LAST\n";
  65. $URL = "http://nwvault.ign.com/View.php?view=Scripts.Detail\\&id=";
  66. }elsif($arg eq "sounds"){
  67. $OUTPUT = "$BASE/sounds";
  68. $LAST = &get_last_entry(sounds);
  69. print "lastentry: $LAST\n";
  70. $URL = "http://nwvault.ign.com/View.php?view=Sounds.Detail\\&id=";
  71. }elsif($arg eq "textures"){
  72. $OUTPUT = "$BASE/textures";
  73. $LAST = &get_last_entry(textures);
  74. print "lastentry: $LAST\n";
  75. $URL = "http://nwvault.ign.com/View.php?view=Textures.Detail\\&id=";
  76. }elsif($arg eq "news"){
  77. $OUTPUT = "$BASE/community_news";
  78. $LAST = &get_last_entry(community_news);
  79. print "lastentry: $LAST\n";
  80. $URL = "http://nwvault.ign.com/View.php?view=Community_News.Detail\\&id=";
  81. }elsif($arg eq "fanfiction"){
  82. $OUTPUT = "$BASE/fanfiction";
  83. $LAST = &get_last_entry(fanfiction);
  84. print "lastentry: $LAST\n";
  85. $URL = "http://nwvault.ign.com/View.php?view=FanFiction.Detail\\&id=";
  86. }else{
  87. die "usage: $0 <characters|creatures|hakpaks|models|modules|other|prefabs|portraits|screenshots|scripts|sounds|textures|news|fanfiction> [vaultID to start on]\n";
  88. }
  89.  
  90. $START = shift;
  91. if($START eq ""){
  92. $START = 1;
  93. }
  94.  
  95.  
  96. for($id=$START; $id <= $LAST; $id++){
  97. $project = "";
  98. $DIRNAME = "";
  99. $url = $URL . $id;
  100. $page = `curl -s $url`;
  101.  
  102. print "curl -s $url\n";
  103.  
  104. @lines = split /\n/, $page;
  105.  
  106. $comments = 0;
  107. $images = 0;
  108. foreach $l (@lines){
  109. if($images == 1){
  110. if($l =~ /<a href/){
  111. &grab_screenshots($l);
  112. }
  113. }
  114. if($arg eq "fanfiction"){
  115. if($l =~ /<span class="pageheader">(.*?)<\/span><br>/){
  116. $project = $1;
  117. $project =~ s/[^\w\s]//g;
  118. $project =~ s/ /_/g;
  119. $DIRNAME = $OUTPUT . "/" . $id . "-" . $project;
  120. print "\nprocessing ($id) $project -> $DIRNAME\n";
  121. `mkdir -p $DIRNAME`;
  122. }
  123. if($l =~ /<a href="fms\/Image.php.*<img src="(http.*\/(.*?jpg))/){
  124. $url = $1;
  125. $img = $2;
  126. $img =~ s/_thumb/_fullres/;
  127. $img =~ s/_thumb/_fullres/;
  128. print "grabbing image: $img\n";
  129. `wget --quiet -O $DIRNAME/$img $url `;
  130.  
  131. }
  132. }
  133.  
  134. if($l =~ /<a href="\#Files" title=".*?>(.*?)<\/a>/){
  135. $project = $1;
  136. $project =~ s/[^\w\s]//g;
  137. $project =~ s/ /_/g;
  138. $DIRNAME = $OUTPUT . "/" . $id . "-" . $project;
  139. print "\nprocessing ($id) $project -> $DIRNAME\n";
  140. `mkdir -p $DIRNAME`;
  141. }
  142.  
  143. if($l =~ /<a href="(fms\/Download\.php.*?)".*?>(.*?)<span>/){
  144. &grab_downloads($l);
  145. }
  146. if($comments == 0){
  147. if($l =~ /<A href="\/View.php.*" >Next&gt;<\/A>/){
  148. $comments = 1;
  149. &get_next_page($url, 2);
  150. }
  151. }
  152. if($l =~ /-START OF IMAGE CODE-/){
  153. $images = 1;
  154. }
  155. }
  156.  
  157. open(FILE, ">$DIRNAME/index.html");
  158. print FILE $page;
  159. close FILE;
  160.  
  161. print "sleeping $SLEEP seconds\n";
  162. sleep ($SLEEP);
  163. }
  164.  
  165.  
  166. sub get_next_page{
  167. $u = shift;
  168. $num = shift;
  169. print "fetching comments page: $num\n";
  170.  
  171. $u2 = $u . "\\&comment_page=$num";
  172.  
  173. $p = `curl -s $u2`;
  174. open(FILE, ">$DIRNAME/index$num.html");
  175. print FILE $p;
  176. close FILE;
  177.  
  178.  
  179. @lines2 = split /\n/, $p;
  180. foreach $l2 (@lines2){
  181. if($l2 =~ /<A href="\/View.php.*" >Next&gt;<\/A>/){
  182. &get_next_page($u, $num + 1);
  183. }
  184. }
  185. }
  186.  
  187. sub grab_screenshots{
  188. $images = 0;
  189. $imgline = shift;
  190.  
  191. @imgchunks = split /<p>/, $imgline;
  192.  
  193. foreach $ic (@imgchunks){
  194. if($ic =~ /src="(http:\/\/vnmedia.ign.com\/nwvault.ign.com\/fms\/images\/.*?\/.*?\/(.*?))"/){
  195. $url = $1;
  196. $img = $2;
  197. $img =~ s/_thumb/_fullres/;
  198. $url =~ s/_thumb/_fullres/;
  199.  
  200. print "grabbing image: $img\n";
  201. `wget --quiet -O $DIRNAME/$img $url `;
  202. }
  203. }
  204. }
  205.  
  206. sub grab_downloads{
  207. $line = shift;
  208. @rows = split /<tr>/, $line;
  209.  
  210. foreach $r (@rows){
  211. if($r =~ /<a href="(fms\/Download\.php.*?)".*?>(.*?)<span>/){
  212. print "downloading: $2\n";
  213. `wget --quiet -O $DIRNAME/$2 http://nwvault.ign.com/$1`;
  214. }
  215. }
  216. }
  217.  
  218. sub get_last_entry{
  219. $category = shift;
  220.  
  221. $url = "http://nwvault.ign.com/View.php?view=LatestAdditions\\&clusters=" . $category . "\\&days=3000\\&show_days_back=1";
  222. $p = `curl -s $url`;
  223. @lines = split /\n/, $p;
  224.  
  225. $next = 0;
  226. foreach $l (@lines){
  227. if($l =~ /<a href="View.php\?view=.*?&id=(.*?)"/){
  228. $max = $1;
  229. last;
  230. }
  231. }
  232. return $max;
  233. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement