Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- #$BASE = "/media/disk/vpp";
- $BASE = "~/Projects/vaultscraper/";
- $DIRNAME;
- $SLEEP = 0;
- $arg = shift;
- if($arg eq "characters"){
- $OUTPUT = "$BASE/characters";
- $LAST = &get_last_entry(characters);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Characters.Detail\\&id=";
- }elsif($arg eq "creatures"){
- $OUTPUT = "$BASE/creatures";
- $LAST = &get_last_entry(creatures);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Creatures.Detail\\&id=";
- }elsif($arg eq "hakpaks"){
- $OUTPUT = "$BASE/hakpaks";
- $LAST = &get_last_entry(hakpaks);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Hakpaks.Detail\\&id=";
- }elsif($arg eq "models"){
- $OUTPUT = "$BASE/models";
- $LAST = &get_last_entry(models);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Models.Detail\\&id=";
- }elsif($arg eq "modules"){
- $OUTPUT = "$BASE/modules";
- $LAST = &get_last_entry(modules);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Modules.Detail\\&id=";
- }elsif($arg eq "movies"){
- $OUTPUT = "$BASE/movies";
- $LAST = &get_last_entry(movies);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Movies.Detail\\&id=";
- }elsif($arg eq "other"){
- $OUTPUT = "$BASE/other";
- $LAST = &get_last_entry(other);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Other.Detail\\&id=";
- }elsif($arg eq "prefabs"){
- $OUTPUT = "$BASE/prefabs";
- $LAST = &get_last_entry(prefabs);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Prefabs.Detail\\&id=";
- }elsif($arg eq "portraits"){
- $OUTPUT = "$BASE/portraits";
- $LAST = &get_last_entry(portraits);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Portraits.Detail\\&id=";
- }elsif($arg eq "screenshots"){
- $OUTPUT = "$BASE/screenshots";
- $LAST = &get_last_entry(screenshots);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Screenshots.Detail\\&id=";
- }elsif($arg eq "scripts"){
- $OUTPUT = "$BASE/scripts";
- $LAST = &get_last_entry(scripts);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Scripts.Detail\\&id=";
- }elsif($arg eq "sounds"){
- $OUTPUT = "$BASE/sounds";
- $LAST = &get_last_entry(sounds);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Sounds.Detail\\&id=";
- }elsif($arg eq "textures"){
- $OUTPUT = "$BASE/textures";
- $LAST = &get_last_entry(textures);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Textures.Detail\\&id=";
- }elsif($arg eq "news"){
- $OUTPUT = "$BASE/community_news";
- $LAST = &get_last_entry(community_news);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=Community_News.Detail\\&id=";
- }elsif($arg eq "fanfiction"){
- $OUTPUT = "$BASE/fanfiction";
- $LAST = &get_last_entry(fanfiction);
- print "lastentry: $LAST\n";
- $URL = "http://nwvault.ign.com/View.php?view=FanFiction.Detail\\&id=";
- }else{
- die "usage: $0 <characters|creatures|hakpaks|models|modules|other|prefabs|portraits|screenshots|scripts|sounds|textures|news|fanfiction> [vaultID to start on]\n";
- }
- $START = shift;
- if($START eq ""){
- $START = 1;
- }
- for($id=$START; $id <= $LAST; $id++){
- $project = "";
- $DIRNAME = "";
- $url = $URL . $id;
- $page = `curl -s $url`;
- print "curl -s $url\n";
- @lines = split /\n/, $page;
- $comments = 0;
- $images = 0;
- foreach $l (@lines){
- if($images == 1){
- if($l =~ /<a href/){
- &grab_screenshots($l);
- }
- }
- if($arg eq "fanfiction"){
- if($l =~ /<span class="pageheader">(.*?)<\/span><br>/){
- $project = $1;
- $project =~ s/[^\w\s]//g;
- $project =~ s/ /_/g;
- $DIRNAME = $OUTPUT . "/" . $id . "-" . $project;
- print "\nprocessing ($id) $project -> $DIRNAME\n";
- `mkdir -p $DIRNAME`;
- }
- if($l =~ /<a href="fms\/Image.php.*<img src="(http.*\/(.*?jpg))/){
- $url = $1;
- $img = $2;
- $img =~ s/_thumb/_fullres/;
- $img =~ s/_thumb/_fullres/;
- print "grabbing image: $img\n";
- `wget --quiet -O $DIRNAME/$img $url `;
- }
- }
- if($l =~ /<a href="\#Files" title=".*?>(.*?)<\/a>/){
- $project = $1;
- $project =~ s/[^\w\s]//g;
- $project =~ s/ /_/g;
- $DIRNAME = $OUTPUT . "/" . $id . "-" . $project;
- print "\nprocessing ($id) $project -> $DIRNAME\n";
- `mkdir -p $DIRNAME`;
- }
- if($l =~ /<a href="(fms\/Download\.php.*?)".*?>(.*?)<span>/){
- &grab_downloads($l);
- }
- if($comments == 0){
- if($l =~ /<A href="\/View.php.*" >Next><\/A>/){
- $comments = 1;
- &get_next_page($url, 2);
- }
- }
- if($l =~ /-START OF IMAGE CODE-/){
- $images = 1;
- }
- }
- open(FILE, ">$DIRNAME/index.html");
- print FILE $page;
- close FILE;
- print "sleeping $SLEEP seconds\n";
- sleep ($SLEEP);
- }
- sub get_next_page{
- $u = shift;
- $num = shift;
- print "fetching comments page: $num\n";
- $u2 = $u . "\\&comment_page=$num";
- $p = `curl -s $u2`;
- open(FILE, ">$DIRNAME/index$num.html");
- print FILE $p;
- close FILE;
- @lines2 = split /\n/, $p;
- foreach $l2 (@lines2){
- if($l2 =~ /<A href="\/View.php.*" >Next><\/A>/){
- &get_next_page($u, $num + 1);
- }
- }
- }
- sub grab_screenshots{
- $images = 0;
- $imgline = shift;
- @imgchunks = split /<p>/, $imgline;
- foreach $ic (@imgchunks){
- if($ic =~ /src="(http:\/\/vnmedia.ign.com\/nwvault.ign.com\/fms\/images\/.*?\/.*?\/(.*?))"/){
- $url = $1;
- $img = $2;
- $img =~ s/_thumb/_fullres/;
- $url =~ s/_thumb/_fullres/;
- print "grabbing image: $img\n";
- `wget --quiet -O $DIRNAME/$img $url `;
- }
- }
- }
- sub grab_downloads{
- $line = shift;
- @rows = split /<tr>/, $line;
- foreach $r (@rows){
- if($r =~ /<a href="(fms\/Download\.php.*?)".*?>(.*?)<span>/){
- print "downloading: $2\n";
- `wget --quiet -O $DIRNAME/$2 http://nwvault.ign.com/$1`;
- }
- }
- }
- sub get_last_entry{
- $category = shift;
- $url = "http://nwvault.ign.com/View.php?view=LatestAdditions\\&clusters=" . $category . "\\&days=3000\\&show_days_back=1";
- $p = `curl -s $url`;
- @lines = split /\n/, $p;
- $next = 0;
- foreach $l (@lines){
- if($l =~ /<a href="View.php\?view=.*?&id=(.*?)"/){
- $max = $1;
- last;
- }
- }
- return $max;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement