#!/usr/bin/perl
use strict;
use warnings;
use threads;
use threads::shared;
use Socket;
use Getopt::Std;
use Net::IP;
use LWP::UserAgent;
# See if String::Compare is installed
my $compare_available = 0;
eval
{
require String::Compare;
String::Compare->import();
};
unless($@)
{
$compare_available = 1;
}
# Track how long we're running
my $starttime = time();
# Share the search range and page queue so the threads know what to do
my ($ip_queue, %page_queue, %header_queue, $fetching, $checking, $next_ip) :shared;
$fetching = 0;
$checking = 0;
# Get command line options
my ($search_range, $search_url, $search_string, $omit_string, $http_timeout, $threads_retrieve, $threads_check, $verbose) = getparams();
# Set our first and last IP addresses to search
$next_ip = unpack 'N', inet_aton($search_range->ip());
$ip_queue = $search_range->size()->numify();
# Split the search URL out into a protocol, hostname, and path so we can mix and match the pieces later
$search_url =~ m/^(\w+)\:\/\/(.+?)\/(.*)$/ or die('Could not extract URL components');
my ($search_protocol, $search_hostname, $search_path) = ($1, $2, $3);
print "Protocol: $search_protocol; Hostname: $search_hostname; Path: $search_path\n" if ($verbose > 0);
# If we're not string matching set up a variable to store the page we are comparing against and populate it with the contents of the original URL
my $search_page;
unless ($search_string) {
($search_page) = retrieve_page($search_url) or die ('Could not retrieve search page');
}
# Spin up retriever threads
print "Starting $threads_retrieve retrieve threads\n" if ($verbose > 0);
for (my $threadcount = 0; $threadcount < $threads_retrieve; $threadcount++) {
my $thread = threads->create('start_retrieve');
}
# Spin up check threads
print "Starting $threads_check check threads\n" if ($verbose > 0);
for (my $threadcount = 0; $threadcount < $threads_check; $threadcount++) {
my $thread = threads->create('start_check');
}
# Wait for threads to do their thing
while (my @threads = threads->list()) {
my @joinable = threads->list(threads::joinable);
if (@joinable) {
foreach my $thread (@joinable) {
$thread->join();
}
} else {
print "IPs in fetch queue: ". $ip_queue ."; Pages in check queue: ". keys(%page_queue) ."\n" if ($verbose > 0);
sleep(1);
}
}
# And we're done
my $runtime = time() - $starttime;
print "Ran in $runtime seconds\n" if ($verbose > 0);
exit;
sub start_retrieve {
$fetching++;
my $retrieve_done = 0;
RETRIEVE: until ($retrieve_done) {
# Pull an IP address off the list in a locked block
my $ip;
{
lock($next_ip);
lock($ip_queue);
if ($ip_queue) {
$ip_queue--;
$ip = inet_ntoa(pack 'N', $next_ip);
$next_ip++;
} else {
$retrieve_done = 1;
last RETRIEVE;
}
}
# Retrieve the page
my ($page, $headers) = retrieve_page($search_protocol ."://". $ip ."/". $search_path, $search_hostname);
# Lock the queue and add the page if we got one, skip it if we failed
if ($page) {
lock(%page_queue);
$page_queue{$ip} = $page;
$header_queue{$ip} = $headers;
print "$ip retrieved and added to queue\n" if ($verbose > 1);
} else {
print "$ip retrieve failed\n" if ($verbose > 1);
}
}
$fetching--;
}
sub start_check {
$checking++;
my $check_done = 0;
my $wait = 0;
CHECK: until ($check_done) {
my ($ip, $page, $headers);
# Lock the queue and check if there's anything we can grab from it
{
lock(%page_queue);
my @keys = keys(%page_queue);
if (@keys) {
$ip = shift(@keys);
$page = $page_queue{$ip};
$headers = $header_queue{$ip};
delete($page_queue{$ip});
delete($header_queue{$ip});
} elsif ($ip_queue || $fetching) {
$wait = 1;
} else {
$check_done = 1;
last CHECK;
}
}
# If there's nothing in the queue we should wait
if ($wait) {
sleep(1);
$wait = 0;
}
# If we're not waiting we process the page we got
else {
# Search string style if we have one
if ($search_string) {
if ($page =~ /$search_string/) {
if ($omit_string && $headers =~ /$omit_string/) {
print "$ip matched string but omitted based on headers\n" if ($verbose > 1);
} else {
print "$ip matched string\n";
}
} else {
print "$ip did not match string\n" if ($verbose > 1);
}
}
# Percent match if we don't
else {
my $percent = compare_pages($search_page, $page);
print "$ip is a $percent\% match\n";
}
}
}
$checking--;
}
# Print help message, automatically called by getopts if '--help' is passed on the command line
sub HELP_MESSAGE {
print <<EOF
noclouds.pl command line options:
-u: url to match against
-i: ip range to search
-s: string to search for instead of percent matching, mandatory if String::Compare is not installed
-o: optionally omit results where the header matches the provided string, only works if used in conjunction with -s
-t: optional HTTP connection timeout
-r: optional retrieve thread count (default 128)
-c: optional check thread count (default 4)
-v: optional verbose mode (Current useful values 0-2)
EOF
;
exit;
}
# Get and return our command line parameters
sub getparams {
my %opts;
getopts('u:i:s:o:t:r:c:v:', \%opts);
# Optional verbose mode to increase the amount of noise we generate while scanning
my $verbose = $opts{'v'};
unless ($verbose) {
$verbose = 0;
}
# Target URL
my $search_url = $opts{'u'} or die ("No target URL provided");
# IP range to search
my $search_range = new Net::IP ($opts{'i'}) or die ('No valid search range supplied');
# String to search for in target pages, this is optional
my $search_string = $opts{'s'};
# String to search for in target headers and omit results, this is optional
my $omit_string = $opts{'o'};
# Optional timeout for HTTP requests
my $http_timeout = $opts{'t'};
# Optional retreive thread count
my $threads_retrieve = $opts{'r'};
unless ($threads_retrieve) {
$threads_retrieve = 128;
}
# Optional check thread count
my $threads_check = $opts{'c'};
unless ($threads_check) {
$threads_check = 4;
}
# Set our search parameters
if ($search_string) {
print "Searching for $search_string\n" if ($verbose > 0);
} else {
if ($compare_available) {
print "Checking percentage match against $search_url\n" if ($verbose > 0);
} else {
die('String::Compare is not available, provide a search string with -s');
}
}
return ($search_range, $search_url, $search_string, $omit_string, $http_timeout, $threads_retrieve, $threads_check, $verbose);
}
# Retrieve a page and return its contents based on a URL, optionally set the hostname header
sub retrieve_page {
my ($url, $hostname) = @_;
# Create a user agent object, spoofing Safari 6.0.4 user agent headers so we can be stealthy
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.29.13 (KHTML, like Gecko) Version/6.0.4 Safari/536.29.13');
if ($http_timeout) {
$ua->timeout($http_timeout);
}
my $req = HTTP::Request->new(GET => $url);
# Set the Host header if requested to deal with name based virtual hosting
if ($hostname) {
$req->header(Host => $hostname);
}
# Get the page
my $res = $ua->request($req);
# Mark our result whether or not we succeeded in the request and add the page content if we have it
if ($res->is_success()) {
my $headers = join('; ', $res->header_field_names());
return($res->content(), $headers);
} else {
return(undef(), undef());
}
}
# Compare a master copy of a page to a page object and return percentage of how similar they are
sub compare_pages {
my ($original_page, $target_page) = @_;
if ($target_page) {
return(compare($original_page, $target_page) * 100);
} else {
return(undef());
}
}