Advertisement
Guest User

parse

a guest
Oct 23rd, 2017
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.30 KB | None | 0 0
  1. !/usr/bin/env perl
  2. use strict;
  3. use Data::Dumper;
  4. use DBI;
  5. use warnings;
  6. use Proc::Queue;
  7. use POSIX ":sys_wait_h";
  8. use List::MoreUtils qw(uniq);
  9.  
  10. # constants which will be passed through cli
  11. use constant TMP_DIR => $ARGV[0];
  12. use constant SOURCE_FILE_PATH => TMP_DIR . '/' . $ARGV[1];
  13. use constant MYSQL_HOST => $ARGV[2];
  14. use constant MYSQL_DB => $ARGV[3];
  15. use constant MYSQL_USER => $ARGV[4];
  16. use constant MYSQL_PASSWORD => $ARGV[5];
  17. use constant ZONE_NAME => $ARGV[6];
  18.  
  19. use constant OUTPUT_FILE => TMP_DIR . '/' . ZONE_NAME . '_out.txt';
  20. use constant MYSQL_PER_INSERT => 250;
  21. use constant THREADS_AMOUNT => 16;
  22. use constant MAX_PROCESSES_AMOUNT => 30;
  23. use constant DEBUG => 1;
  24.  
  25. Proc::Queue::size(MAX_PROCESSES_AMOUNT);
  26.  
  27. Proc::Queue::trace(0); # trace mode on - 0
  28. Proc::Queue::debug(0);
  29.  
  30. sub get_log_time {
  31. my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
  32. my $nice_timestamp = sprintf ("%04d-%02d-%02d %02d:%02d:%02d",
  33. $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
  34. return '[' . $nice_timestamp . '][' . ZONE_NAME . '] ';
  35. }
  36.  
  37. my %providers;
  38.  
  39. sub get_connection {
  40. return DBI->connect("DBI:mysql:database=" . MYSQL_DB . ";host=" . MYSQL_HOST,
  41. MYSQL_USER, MYSQL_PASSWORD,
  42. { 'RaiseError' => 1 });
  43. }
  44.  
  45. my $dbh = get_connection;
  46.  
  47. my $sth = $dbh->prepare("SELECT id, mask FROM providers");
  48.  
  49. $sth->execute();
  50.  
  51. while (my $ref = $sth->fetchrow_hashref()) {
  52. my $mask = $ref->{'mask'};
  53. $mask =~ s/\*/\(\.\*\?\)/gi;
  54.  
  55. $providers{$mask} = $ref->{'id'};
  56. }
  57.  
  58. $sth->finish();
  59.  
  60. $dbh->disconnect();
  61. my $total_providers = keys %providers;
  62. print get_log_time . "Total providers found: $total_providers\n" if DEBUG;
  63.  
  64. open (FH, "<" . SOURCE_FILE_PATH) || die "Can't open file: $!";
  65. my $ns_entries = 0;
  66.  
  67. my %ns_hash;
  68.  
  69. my $read_lines;
  70. while (my $tmp = <FH>) {
  71. chomp ($tmp);
  72. if ($tmp =~ /^(\S*)\.\s+\d+\s+\S+\s+NS\s+(\S*)\./) {
  73. $ns_entries++;
  74. my $host = $1;
  75. my $ns_server = $2;
  76.  
  77. $ns_hash{$ns_server} .= $host . "::";
  78. }
  79. elsif ($tmp =~ /^(\S*)\s+NS\s+(\S*)\./) {
  80. $ns_entries++;
  81. my $host = $1 . '.' . ZONE_NAME;
  82. my $ns_server = $2;
  83. $ns_hash{$ns_server} .= $host . "::";
  84. }
  85. # last if (++$read_lines == 1000000) and DEBUG;
  86. }
  87.  
  88. print get_log_time . "Total NS records in file: $ns_entries\n" if DEBUG;
  89. close (FH);
  90.  
  91. my %provider_domains;
  92. my $processed_ns_servers = 0;
  93.  
  94. my $total_ns_reecords = (keys %ns_hash);
  95.  
  96. foreach my $ns_server (keys %ns_hash) {
  97. for my $providerMask (keys %providers) {
  98. if ($ns_server =~ m/$providerMask/i) {
  99. my $provider_name = $providers{$providerMask};
  100. $provider_domains{$provider_name} .= $ns_hash{$ns_server};
  101. last;
  102. }
  103. }
  104. $processed_ns_servers++;
  105. print get_log_time . "P NS: $processed_ns_servers out of $total_ns_reecords\n" if $processed_ns_servers % 1000 == 0 && DEBUG;
  106. }
  107.  
  108. foreach my $provider_id (keys(%provider_domains)) {
  109. my @provider_domains = split('::', $provider_domains{$provider_id});
  110. @provider_domains = uniq @provider_domains;
  111.  
  112. foreach my $domain (@provider_domains) {
  113. $domain = lc $domain;
  114.  
  115. my $f = fork;
  116.  
  117. if (defined ($f) and $f == 0) {
  118. use Fcntl qw(:flock SEEK_END);
  119. use Net::Nslookup;
  120. use Geo::IP;
  121.  
  122. my $gi = Geo::IP->open_type(GEOIP_CITY_EDITION_REV1, GEOIP_STANDARD);
  123.  
  124. sub get_region_id {
  125. my ($code) = @_;
  126.  
  127. if ($code eq "AF") {return 1}
  128. if ($code eq "AN") {return 2}
  129. if ($code eq "AS") {return 3}
  130. if ($code eq "EU") {return 4}
  131. if ($code eq "NA") {return 5}
  132. if ($code eq "OC") {return 6}
  133. if ($code eq "SA") {return 7}
  134.  
  135. return 'NULL';
  136. }
  137.  
  138. sub write_domain_file {
  139. my ($domain, $host_ip, $host_provider, $region_id, $zone) = @_;
  140. open (FOUT, ">>" . OUTPUT_FILE) || die "can't open file\n";
  141. flock (FOUT, LOCK_EX) || die "can't lock file $!\n";
  142. seek(FOUT, 0, SEEK_END) or die "Cannot seek - $!\n";
  143. print FOUT "('$domain', $host_ip, $host_provider, $region_id, '$zone', UTC_TIMESTAMP, UTC_TIMESTAMP)\n";
  144. close (FOUT);
  145. }
  146.  
  147. $SIG{ALRM} = sub {
  148. write_domain_file ($domain, 'NULL', $provider_id, 'NULL', ZONE_NAME);
  149. exit(0);
  150. };
  151.  
  152. my @addrs = nslookup $domain;
  153.  
  154. my $host_ip = 'NULL';
  155. if (@addrs) {
  156. $host_ip = "'".$addrs[0]."'";
  157. }
  158.  
  159. my $r = $gi->record_by_addr($host_ip);
  160. my $region_id = 'NULL';
  161.  
  162. if ($r) {
  163. my $code = $r->continent_code;
  164. $region_id = get_region_id $code;
  165. }
  166.  
  167. write_domain_file ($domain, $host_ip, $provider_id, $region_id, ZONE_NAME);
  168. exit(0);
  169. }
  170. }
  171. }
  172.  
  173. 1 while wait != - 1;
  174. # finished processing input file
  175.  
  176. # starting inserting data into db
  177. $dbh = get_connection;
  178.  
  179. my $insert_start = 'INSERT INTO sites(domain, ip, provider_id, region_id, zone, created_at, updated_at) VALUES ';
  180. my @insert_values;
  181. my $insert_end = ' ON DUPLICATE KEY UPDATE ip = VALUES(ip), provider_id = VALUES(provider_id), updated_at = UTC_TIMESTAMP;';
  182. my $insert_values_count = 0;
  183.  
  184. open (FH, "<" . OUTPUT_FILE) || die "Can't open file: $!";
  185.  
  186. while (my $tmp = <FH>) {
  187. chomp ($tmp);
  188.  
  189. push(@insert_values, $tmp);
  190.  
  191. $insert_values_count++;
  192.  
  193. if (scalar @insert_values != 0 && scalar @insert_values % MYSQL_PER_INSERT == 0) {
  194. my $statement = $insert_start . join(',', @insert_values) . $insert_end;
  195.  
  196. my $sth = $dbh->prepare($statement);
  197.  
  198. $sth->execute();
  199.  
  200. $sth->finish();
  201.  
  202. $insert_values_count = 0;
  203. @insert_values = ();
  204.  
  205. print get_log_time . "Inserted: " . MYSQL_PER_INSERT . "\n" if DEBUG;
  206. }
  207. }
  208. close (FH);
  209.  
  210. if (scalar @insert_values != 0) {
  211. my $statement = $insert_start . join(',', @insert_values) . $insert_end;
  212.  
  213. my $sth = $dbh->prepare($statement);
  214.  
  215. $sth->execute();
  216.  
  217. $sth->finish();
  218. }
  219.  
  220. $dbh->disconnect();
  221.  
  222. exit;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement