daily pastebin goal
11%
SHARE
TWEET

parse

a guest Oct 23rd, 2017 88 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. !/usr/bin/env perl
  2. use strict;
  3. use Data::Dumper;
  4. use DBI;
  5. use warnings;
  6. use Proc::Queue;
  7. use POSIX ":sys_wait_h";
  8. use List::MoreUtils qw(uniq);
  9.  
  10. # constants which will be passed through cli
  11. use constant TMP_DIR => $ARGV[0];
  12. use constant SOURCE_FILE_PATH => TMP_DIR . '/' . $ARGV[1];
  13. use constant MYSQL_HOST => $ARGV[2];
  14. use constant MYSQL_DB => $ARGV[3];
  15. use constant MYSQL_USER => $ARGV[4];
  16. use constant MYSQL_PASSWORD => $ARGV[5];
  17. use constant ZONE_NAME => $ARGV[6];
  18.  
  19. use constant OUTPUT_FILE => TMP_DIR . '/' . ZONE_NAME . '_out.txt';
  20. use constant MYSQL_PER_INSERT => 250;
  21. use constant THREADS_AMOUNT => 16;
  22. use constant MAX_PROCESSES_AMOUNT => 30;
  23. use constant DEBUG => 1;
  24.  
  25. Proc::Queue::size(MAX_PROCESSES_AMOUNT);
  26.  
  27. Proc::Queue::trace(0); # trace mode on - 0
  28. Proc::Queue::debug(0);
  29.  
  30. sub get_log_time {
  31.     my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
  32.     my $nice_timestamp = sprintf ("%04d-%02d-%02d %02d:%02d:%02d",
  33.         $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
  34.     return '[' . $nice_timestamp . '][' . ZONE_NAME . '] ';
  35. }
  36.  
  37. my %providers;
  38.  
  39. sub get_connection {
  40.     return DBI->connect("DBI:mysql:database=" . MYSQL_DB . ";host=" . MYSQL_HOST,
  41.         MYSQL_USER, MYSQL_PASSWORD,
  42.         { 'RaiseError' => 1 });
  43. }
  44.  
  45. my $dbh = get_connection;
  46.  
  47. my $sth = $dbh->prepare("SELECT id, mask FROM providers");
  48.  
  49. $sth->execute();
  50.  
  51. while (my $ref = $sth->fetchrow_hashref()) {
  52.     my $mask = $ref->{'mask'};
  53.     $mask =~ s/\*/\(\.\*\?\)/gi;
  54.  
  55.     $providers{$mask} = $ref->{'id'};
  56. }
  57.  
  58. $sth->finish();
  59.  
  60. $dbh->disconnect();
  61. my $total_providers = keys %providers;
  62. print get_log_time . "Total providers found: $total_providers\n" if DEBUG;
  63.  
  64. open (FH, "<" . SOURCE_FILE_PATH) || die "Can't open file: $!";
  65. my $ns_entries = 0;
  66.  
  67. my %ns_hash;
  68.  
  69. my $read_lines;
  70. while (my $tmp = <FH>) {
  71.     chomp ($tmp);
  72.     if ($tmp =~ /^(\S*)\.\s+\d+\s+\S+\s+NS\s+(\S*)\./) {
  73.         $ns_entries++;
  74.         my $host = $1;
  75.         my $ns_server = $2;
  76.  
  77.         $ns_hash{$ns_server} .= $host . "::";
  78.     }
  79.     elsif ($tmp =~ /^(\S*)\s+NS\s+(\S*)\./) {
  80.         $ns_entries++;
  81.         my $host = $1 . '.' . ZONE_NAME;
  82.         my $ns_server = $2;
  83.         $ns_hash{$ns_server} .= $host . "::";
  84.     }
  85. #    last if (++$read_lines == 1000000) and DEBUG;
  86. }
  87.  
  88. print get_log_time . "Total NS records in file: $ns_entries\n" if DEBUG;
  89. close (FH);
  90.  
  91. my %provider_domains;
  92. my $processed_ns_servers = 0;
  93.  
  94. my $total_ns_reecords = (keys %ns_hash);
  95.  
  96. foreach my $ns_server (keys %ns_hash) {
  97.     for my $providerMask (keys %providers) {
  98.         if ($ns_server =~ m/$providerMask/i) {
  99.             my $provider_name = $providers{$providerMask};
  100.             $provider_domains{$provider_name} .= $ns_hash{$ns_server};
  101.             last;
  102.         }
  103.     }
  104.     $processed_ns_servers++;
  105.     print get_log_time . "P NS: $processed_ns_servers out of $total_ns_reecords\n" if $processed_ns_servers % 1000 == 0 && DEBUG;
  106. }
  107.  
  108. foreach my $provider_id (keys(%provider_domains)) {
  109.     my @provider_domains = split('::', $provider_domains{$provider_id});
  110.     @provider_domains = uniq @provider_domains;
  111.  
  112.     foreach my $domain (@provider_domains) {
  113.         $domain = lc $domain;
  114.  
  115.         my $f = fork;
  116.  
  117.         if (defined ($f) and $f == 0) {
  118.             use Fcntl qw(:flock SEEK_END);
  119.             use Net::Nslookup;
  120.             use Geo::IP;
  121.  
  122.             my $gi = Geo::IP->open_type(GEOIP_CITY_EDITION_REV1, GEOIP_STANDARD);
  123.  
  124.             sub get_region_id {
  125.                 my ($code) = @_;
  126.  
  127.                 if ($code eq "AF") {return 1}
  128.                 if ($code eq "AN") {return 2}
  129.                 if ($code eq "AS") {return 3}
  130.                 if ($code eq "EU") {return 4}
  131.                 if ($code eq "NA") {return 5}
  132.                 if ($code eq "OC") {return 6}
  133.                 if ($code eq "SA") {return 7}
  134.  
  135.                 return 'NULL';
  136.             }
  137.  
  138.             sub write_domain_file {
  139.                 my ($domain, $host_ip, $host_provider, $region_id, $zone) = @_;
  140.                 open (FOUT, ">>" . OUTPUT_FILE) || die "can't open file\n";
  141.                 flock (FOUT, LOCK_EX) || die "can't lock file $!\n";
  142.                 seek(FOUT, 0, SEEK_END) or die "Cannot seek - $!\n";
  143.                 print FOUT "('$domain', $host_ip, $host_provider, $region_id, '$zone', UTC_TIMESTAMP, UTC_TIMESTAMP)\n";
  144.                 close (FOUT);
  145.             }
  146.  
  147.             $SIG{ALRM} = sub {
  148.                 write_domain_file ($domain, 'NULL', $provider_id, 'NULL', ZONE_NAME);
  149.                 exit(0);
  150.             };
  151.  
  152.             my @addrs = nslookup $domain;
  153.  
  154.             my $host_ip = 'NULL';
  155.             if (@addrs) {
  156.                 $host_ip = "'".$addrs[0]."'";
  157.             }
  158.  
  159.             my $r = $gi->record_by_addr($host_ip);
  160.             my $region_id = 'NULL';
  161.  
  162.             if ($r) {
  163.                 my $code = $r->continent_code;
  164.                 $region_id = get_region_id $code;
  165.             }
  166.  
  167.             write_domain_file ($domain, $host_ip, $provider_id, $region_id, ZONE_NAME);
  168.             exit(0);
  169.         }
  170.     }
  171. }
  172.  
  173. 1 while wait != - 1;
  174. # finished processing input file
  175.  
  176. # starting inserting data into db
  177. $dbh = get_connection;
  178.  
  179. my $insert_start = 'INSERT INTO sites(domain, ip, provider_id, region_id, zone, created_at, updated_at) VALUES ';
  180. my @insert_values;
  181. my $insert_end = ' ON DUPLICATE KEY UPDATE ip = VALUES(ip), provider_id = VALUES(provider_id), updated_at = UTC_TIMESTAMP;';
  182. my $insert_values_count = 0;
  183.  
  184. open (FH, "<" . OUTPUT_FILE) || die "Can't open file: $!";
  185.  
  186. while (my $tmp = <FH>) {
  187.     chomp ($tmp);
  188.  
  189.     push(@insert_values, $tmp);
  190.  
  191.     $insert_values_count++;
  192.  
  193.     if (scalar @insert_values != 0 && scalar @insert_values % MYSQL_PER_INSERT == 0) {
  194.         my $statement = $insert_start . join(',', @insert_values) . $insert_end;
  195.  
  196.         my $sth = $dbh->prepare($statement);
  197.  
  198.         $sth->execute();
  199.  
  200.         $sth->finish();
  201.  
  202.         $insert_values_count = 0;
  203.         @insert_values = ();
  204.  
  205.         print get_log_time . "Inserted: " . MYSQL_PER_INSERT . "\n" if DEBUG;
  206.     }
  207. }
  208. close (FH);
  209.  
  210. if (scalar @insert_values != 0) {
  211.     my $statement = $insert_start . join(',', @insert_values) . $insert_end;
  212.  
  213.     my $sth = $dbh->prepare($statement);
  214.  
  215.     $sth->execute();
  216.  
  217.     $sth->finish();
  218. }
  219.  
  220. $dbh->disconnect();
  221.  
  222. exit;
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top