Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- !/usr/bin/env perl
- use strict;
- use Data::Dumper;
- use DBI;
- use warnings;
- use Proc::Queue;
- use POSIX ":sys_wait_h";
- use List::MoreUtils qw(uniq);
- # constants which will be passed through cli
- use constant TMP_DIR => $ARGV[0];
- use constant SOURCE_FILE_PATH => TMP_DIR . '/' . $ARGV[1];
- use constant MYSQL_HOST => $ARGV[2];
- use constant MYSQL_DB => $ARGV[3];
- use constant MYSQL_USER => $ARGV[4];
- use constant MYSQL_PASSWORD => $ARGV[5];
- use constant ZONE_NAME => $ARGV[6];
- use constant OUTPUT_FILE => TMP_DIR . '/' . ZONE_NAME . '_out.txt';
- use constant MYSQL_PER_INSERT => 250;
- use constant THREADS_AMOUNT => 16;
- use constant MAX_PROCESSES_AMOUNT => 30;
- use constant DEBUG => 1;
- Proc::Queue::size(MAX_PROCESSES_AMOUNT);
- Proc::Queue::trace(0); # trace mode on - 0
- Proc::Queue::debug(0);
- sub get_log_time {
- my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
- my $nice_timestamp = sprintf ("%04d-%02d-%02d %02d:%02d:%02d",
- $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
- return '[' . $nice_timestamp . '][' . ZONE_NAME . '] ';
- }
- my %providers;
- sub get_connection {
- return DBI->connect("DBI:mysql:database=" . MYSQL_DB . ";host=" . MYSQL_HOST,
- MYSQL_USER, MYSQL_PASSWORD,
- { 'RaiseError' => 1 });
- }
- my $dbh = get_connection;
- my $sth = $dbh->prepare("SELECT id, mask FROM providers");
- $sth->execute();
- while (my $ref = $sth->fetchrow_hashref()) {
- my $mask = $ref->{'mask'};
- $mask =~ s/\*/\(\.\*\?\)/gi;
- $providers{$mask} = $ref->{'id'};
- }
- $sth->finish();
- $dbh->disconnect();
- my $total_providers = keys %providers;
- print get_log_time . "Total providers found: $total_providers\n" if DEBUG;
- open (FH, "<" . SOURCE_FILE_PATH) || die "Can't open file: $!";
- my $ns_entries = 0;
- my %ns_hash;
- my $read_lines;
- while (my $tmp = <FH>) {
- chomp ($tmp);
- if ($tmp =~ /^(\S*)\.\s+\d+\s+\S+\s+NS\s+(\S*)\./) {
- $ns_entries++;
- my $host = $1;
- my $ns_server = $2;
- $ns_hash{$ns_server} .= $host . "::";
- }
- elsif ($tmp =~ /^(\S*)\s+NS\s+(\S*)\./) {
- $ns_entries++;
- my $host = $1 . '.' . ZONE_NAME;
- my $ns_server = $2;
- $ns_hash{$ns_server} .= $host . "::";
- }
- # last if (++$read_lines == 1000000) and DEBUG;
- }
- print get_log_time . "Total NS records in file: $ns_entries\n" if DEBUG;
- close (FH);
- my %provider_domains;
- my $processed_ns_servers = 0;
- my $total_ns_reecords = (keys %ns_hash);
- foreach my $ns_server (keys %ns_hash) {
- for my $providerMask (keys %providers) {
- if ($ns_server =~ m/$providerMask/i) {
- my $provider_name = $providers{$providerMask};
- $provider_domains{$provider_name} .= $ns_hash{$ns_server};
- last;
- }
- }
- $processed_ns_servers++;
- print get_log_time . "P NS: $processed_ns_servers out of $total_ns_reecords\n" if $processed_ns_servers % 1000 == 0 && DEBUG;
- }
- foreach my $provider_id (keys(%provider_domains)) {
- my @provider_domains = split('::', $provider_domains{$provider_id});
- @provider_domains = uniq @provider_domains;
- foreach my $domain (@provider_domains) {
- $domain = lc $domain;
- my $f = fork;
- if (defined ($f) and $f == 0) {
- use Fcntl qw(:flock SEEK_END);
- use Net::Nslookup;
- use Geo::IP;
- my $gi = Geo::IP->open_type(GEOIP_CITY_EDITION_REV1, GEOIP_STANDARD);
- sub get_region_id {
- my ($code) = @_;
- if ($code eq "AF") {return 1}
- if ($code eq "AN") {return 2}
- if ($code eq "AS") {return 3}
- if ($code eq "EU") {return 4}
- if ($code eq "NA") {return 5}
- if ($code eq "OC") {return 6}
- if ($code eq "SA") {return 7}
- return 'NULL';
- }
- sub write_domain_file {
- my ($domain, $host_ip, $host_provider, $region_id, $zone) = @_;
- open (FOUT, ">>" . OUTPUT_FILE) || die "can't open file\n";
- flock (FOUT, LOCK_EX) || die "can't lock file $!\n";
- seek(FOUT, 0, SEEK_END) or die "Cannot seek - $!\n";
- print FOUT "('$domain', $host_ip, $host_provider, $region_id, '$zone', UTC_TIMESTAMP, UTC_TIMESTAMP)\n";
- close (FOUT);
- }
- $SIG{ALRM} = sub {
- write_domain_file ($domain, 'NULL', $provider_id, 'NULL', ZONE_NAME);
- exit(0);
- };
- my @addrs = nslookup $domain;
- my $host_ip = 'NULL';
- if (@addrs) {
- $host_ip = "'".$addrs[0]."'";
- }
- my $r = $gi->record_by_addr($host_ip);
- my $region_id = 'NULL';
- if ($r) {
- my $code = $r->continent_code;
- $region_id = get_region_id $code;
- }
- write_domain_file ($domain, $host_ip, $provider_id, $region_id, ZONE_NAME);
- exit(0);
- }
- }
- }
- 1 while wait != - 1;
- # finished processing input file
- # starting inserting data into db
- $dbh = get_connection;
- my $insert_start = 'INSERT INTO sites(domain, ip, provider_id, region_id, zone, created_at, updated_at) VALUES ';
- my @insert_values;
- my $insert_end = ' ON DUPLICATE KEY UPDATE ip = VALUES(ip), provider_id = VALUES(provider_id), updated_at = UTC_TIMESTAMP;';
- my $insert_values_count = 0;
- open (FH, "<" . OUTPUT_FILE) || die "Can't open file: $!";
- while (my $tmp = <FH>) {
- chomp ($tmp);
- push(@insert_values, $tmp);
- $insert_values_count++;
- if (scalar @insert_values != 0 && scalar @insert_values % MYSQL_PER_INSERT == 0) {
- my $statement = $insert_start . join(',', @insert_values) . $insert_end;
- my $sth = $dbh->prepare($statement);
- $sth->execute();
- $sth->finish();
- $insert_values_count = 0;
- @insert_values = ();
- print get_log_time . "Inserted: " . MYSQL_PER_INSERT . "\n" if DEBUG;
- }
- }
- close (FH);
- if (scalar @insert_values != 0) {
- my $statement = $insert_start . join(',', @insert_values) . $insert_end;
- my $sth = $dbh->prepare($statement);
- $sth->execute();
- $sth->finish();
- }
- $dbh->disconnect();
- exit;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement