Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # HOSTS file to ABE rule converter
- # Updated 27/2/2014 with Bugfix (Thanks Miller at Stackoverflow!)
- use strict;
- use warnings;
- use Regexp::Assemble;
- use File::Slurp;
- sub printhelp
- {
- print "Usage: hosts2abe.plx [hostsfile] [aberuleset]\nConvert a HOSTS file to ABE ruleset.\n";
- exit(0);
- }
- sub printargerror
- {
- print "Wrong arguments, please type `hosts2abe.plx -h' to get help.\n";
- exit(1);
- }
- printhelp if (@ARGV == 0 || $ARGV[0] eq "-h");
- printargerror if (@ARGV != 2);
- die "Cannot open input file $ARGV[0] : $!" unless (my $hostsfile = read_file ($ARGV[0], err_mode => "quiet"));
- print "Preliminarily processing the hosts file ...\n";
- $hostsfile =~ s/#.*//mg; # Strip comments
- $hostsfile =~ s/(?:^\s+|\s+$)//mg; # Strip unnecessary whitespace
- $hostsfile =~ s/^[0-9.:]+\s+//mg; # Strip IP addresses
- $hostsfile =~ s/^\s*\n//g; # Strip blank lines
- $hostsfile =~ s/\s+/\n/g; # Put every whitespace seperated element on its own line
- # Whitelisted hosts
- $hostsfile =~ s/(?:localhost|360\.yahoo\.com|92x\.tumblr\.com|\*.*)\n//mg;
- # Generalise rules
- print "Generalising rulesets with heuristics...\n";
- $hostsfile =~ s/^(?:www|[a-z-]*[0-9.-]+|[0-9]+[a-z]+)\.(.{4,}\..{2,})/$1/mg;
- # Remove redundancies
- print "Removing redundancies...\n";
- my @hostnames = split ("\n", $hostsfile);
- # Thanks Hynek -Pichi- Vychodil!
- # https://stackoverflow.com/questions/22008651/removing-redundant-array-elements
- my @result = do {
- my $p;
- map scalar reverse, grep {
- my $x = !defined $p || substr($_, 0, length($p)) ne $p;
- $p = $_ if $x;
- $p .= '.' if $p !~ /\.$/;
- $x
- } sort map scalar reverse, @hostnames;
- };
- # Form regular expressions
- print "Forming regexes...\n";
- my $regex = Regexp::Assemble -> new;
- for (my $i = 0; $i < @result; $i++)
- {
- $_ = $result[$i] =~ s/\./\\./gr; # Non destructive modifier 'r' - Perl >= 5.13 required!
- $regex -> add("^https?://(?:[a-z0-9_-]+\\.)*".$_."/");
- }
- print "Writing to file...";
- open (ABEFILE, '>', $ARGV[1]) or die "Cannot open output file $ARGV[1] : $!\n";
- print ABEFILE "Site " . substr($regex->re, 4, -1) . "\nDeny INC";
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement