Advertisement
Guest User

GEO-IP Apache Log Processor

a guest
Aug 5th, 2014
260
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 8.76 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <time.h>
  5.  
  6.  
  7. struct geo_array {
  8.    int idx;
  9.    unsigned int ip_start;
  10.    unsigned int  ip_end;
  11.    char *country;
  12.    struct geo_array *next;
  13. };
  14.  
  15. struct cache_array {
  16.    unsigned int subnet;
  17.    char *country;
  18.    struct cache_array *next;
  19. };
  20.  
  21. struct geo_stat {
  22.    char *country;
  23.    int hits;
  24.    long size;
  25.    struct geo_stat *next;
  26. };
  27.  
  28. struct geo_array *geo_first;
  29. struct geo_array *geo_ips;
  30. struct cache_array *cache;
  31. struct cache_array *cache_first;
  32. struct cache_array *cache_previous;
  33. struct cache_array *cache_last;
  34. int cache_count = 0;
  35. int lineCount = 0;
  36.  
  37. struct geo_array *read_geo_db(char *filename) { /* This reads the geo_ip database */
  38.    FILE *fHandle;
  39.    char fLine[4096];
  40.    char *record;
  41.    struct geo_array *geo_list;
  42.    struct geo_array *geo_first;
  43.  
  44.    geo_list = geo_first = malloc(sizeof(struct geo_array));
  45.  
  46.    if(!(fHandle = fopen(filename, "r"))) {
  47.       printf("Could not open file %s, dying\n", filename);
  48.       exit(-1);
  49.    }
  50.  
  51.    while(fgets(fLine, 4096, fHandle)) {
  52.      lineCount++;
  53.      record = strtok(fLine, ",");
  54.      geo_list->idx = lineCount;
  55.      geo_list->ip_start = atol(record);
  56.      record = strtok(NULL, ",");
  57.      geo_list->ip_end = atol(record);
  58.      record = strtok(NULL, ",");
  59.      geo_list->country = strdup(record);
  60.      geo_list->country[strlen(record)-1]=0;
  61.      geo_list->next = malloc(sizeof(struct geo_array));
  62.      geo_list->next->next = NULL;
  63.      geo_list = geo_list->next;
  64.    }
  65.    geo_list = geo_first;
  66. }
  67.  
  68. char *check_country(char *IP, struct geo_array *geo) { /* Find the IP in the geo database, return NULL if not found */
  69.    unsigned int IP_int = ntohl(inet_addr(IP));
  70.    while(geo->next) {
  71.      if((IP_int > geo->ip_start) && (IP_int < geo->ip_end)) {
  72.        return(geo->country);
  73.      }
  74.      geo = geo->next;
  75.    }
  76.    return(NULL);
  77. }
  78.  
  79. unsigned int ip24mask(unsigned int ip_int) { /* Convert the IP to the network entry of its corresponding /24 */
  80.   unsigned int mask = ip_int &= ((-1 << 8) & ntohl(inet_addr("255.255.255.255")));
  81.  
  82.   ip_int = ip_int &= mask;
  83.   return(ip_int);
  84.  
  85. };
  86.  
  87. char *check_cache(char *IP, struct geo_array *geo) { /* Build a cache of already identified /24s */
  88.   char *country;
  89.   unsigned int IP_int = ntohl(inet_addr(IP)); /* We need the IP address as an int in host byte order */
  90.   unsigned int IP_subnet = ip24mask(IP_int); /* Do this now so we don't have to do it over and over again */
  91.  
  92.   if(!cache_count) { /* We don't have a cache */
  93.     cache_previous = cache_first = cache = malloc(sizeof(struct cache_array)); /* Allocate for our first cache entry */
  94.     cache->next = NULL; /* There is no next entry yet */
  95.     if(!(country = check_country(IP, geo)))
  96.       country = strdup("UNKNOWN"); /* Couldn't find a country so we need to give it one */
  97.     cache->subnet = IP_subnet; /* Save only the network address of the /24 */
  98.     cache->country = strdup(country); /* Copy the country address into the array */
  99.     cache_last = cache; /* Save the last entry in the cache */
  100.     cache_count = 1; /* We have a cache, set this */
  101.     return(cache->country); /* Return the cache country */
  102.   }
  103.   else {
  104.     while(cache->next) { /* Loop the cache looking for a match */
  105.      if(cache->subnet == IP_subnet) {
  106.        return(cache->country); /* We got a match, bail returning the country */
  107.      }
  108.      cache = cache->next; /* No match yet, next entry in the cache */
  109.     }
  110.     /* If we get here, there is no cache entry */
  111.     cache = cache_last; /* Got to the last entry in the cache */
  112.     cache->next = malloc(sizeof(struct cache_array)); /* Allocate some memory */
  113.     cache = cache->next;
  114.     cache->next = NULL; /* Since we're on the last entry, the next entry doesn't exist yet */
  115.     cache_last = cache; /* Record this as being our last entry */
  116.     if(!(country = check_country(IP, geo)))
  117.       country = strdup("UNKNOWN");  /* We couldn't find a match */
  118.     cache->subnet = IP_subnet;
  119.     cache->country = strdup(country);
  120.     return(cache->country);
  121.   }
  122. };
  123.  
  124.  
  125. struct geo_stat *list_switch(struct geo_stat *l1, struct geo_stat *l2) {
  126.    l1->next = l2->next;
  127.    l2->next = l1;
  128.    return(l2);
  129. }
  130. struct geo_stat *sort_list(struct geo_stat *stats) {
  131.   struct geo_stat *top, *p, *q; /* Top of the array, a pointer to position - 1, and a pointer to position */
  132.   int changed = 1;
  133.  
  134.   top = malloc(sizeof(struct geo_stat)); /* We need an extra entry at the top */
  135.  
  136.   top->next = stats; /* Start at the beginning of the stats array */
  137.   if(stats && stats->next) {
  138.     while(changed) {
  139.       changed = 0;
  140.       q = top;
  141.       p = top->next;
  142.       while(p->next) {
  143.        if(p->hits < p->next->hits) {
  144.          q->next = list_switch(p, p->next);
  145.          changed = 1;
  146.        }
  147.        q = p;
  148.        if(p->next)
  149.         p = p->next;
  150.      }
  151.    }
  152.   }
  153.   p = top->next;
  154.   free(top);
  155.   return(p);
  156. };
  157.  
  158.  
  159. void *read_log(struct geo_array *geo, char *logfile) {
  160.   FILE *fHandle;
  161.   char logline[4096];
  162.   char *string_break1;
  163.   char *string_break2;
  164.   char *string_break3;
  165.   char *country;
  166.   struct geo_stat *stats = malloc(sizeof(struct geo_stat));
  167.   struct geo_stat *stats_first = stats;
  168.   struct geo_stat *stats_previous;
  169.   int stat_count = 0;
  170.   int found_hit = 0;
  171.   int linecount = 0;
  172.   time_t start;
  173.   time_t end;
  174.   double seconds;
  175.   char *find_codes_size, *orig_str;
  176.   int count = 0;
  177.   char *record, *record2;
  178.   long total_hits, total_size;
  179.  
  180.   total_hits = total_size = 0;
  181.  
  182.   fHandle=fopen(logfile, "r");
  183.   time(&start);
  184.   while(fgets(logline, 4096, fHandle)) {
  185. /*    if(!(linecount)) {
  186.        orig_str = find_codes_size = strdup(logline); /
  187.        record = strtok(find_codes_size, "[");
  188.        record = strtok(NULL, "[");
  189.        record2 = strstr(record, "]");
  190.        record2[0] = '\0';
  191.        printf("Processing logfile %s\nFirst entry at %s\n",logfile,record);
  192.        free(orig_str);
  193.      } */
  194. //    linecount++;
  195.     string_break1 = strstr(logline, " "); /* Next coupla lines break up the log entry */
  196.     string_break1[0] = 0;
  197.     string_break1++;
  198.     string_break2 = strstr(string_break1, " ");
  199.     string_break2[0] = 0;
  200.     string_break2++;
  201.     orig_str = find_codes_size = strdup(string_break2); /* Duplicate the string, we are gonna be screwing with this */
  202.     record = strtok(find_codes_size, " ");
  203.     for(count = 0; count < 8; count++)
  204.       record=strtok(NULL, " ");
  205.     cache = cache_previous = cache_first; /* Reset our cache positioning */
  206.     country = check_cache(string_break1, geo); /* Check the cache to see if we have an entry */
  207.     if(!country)
  208.       country = strdup("UNKNOWN"); /* We didn't have an entry and the IP isn't recognized */
  209.     stats = stats_first; /* Go to the first entry in our stats */
  210.     if(stat_count == 0) { /* There IS no stat array yet, make one */
  211.       stats->country = strdup(country);
  212.       stats->hits = 1;
  213.       stats->size = atol(record);
  214.       stats->next = NULL;
  215.       stat_count++;
  216.     }
  217.     else {
  218.       stat_count = 0;
  219.       while(stats) {
  220.         stat_count++;
  221.         if(!(strcmp(stats->country, country))) {
  222.           stats->hits++;
  223.           stats->size += atol(record);
  224.           found_hit = 1;
  225.           break;
  226.         }
  227.         stats_previous = stats;
  228.         stats = stats->next;
  229.       }
  230.       stats = stats_previous;
  231.       if(found_hit == 0) {
  232.        stat_count++;
  233.        stats->next = malloc(sizeof(struct geo_stat));
  234.        stats = stats->next;
  235.        stats->next = NULL;
  236.        stats->country = strdup(country);
  237.        stats->hits = 1;
  238.        stats->size = atol(record);
  239.      }
  240.      stats = stats->next;
  241.      found_hit = 0;
  242.    }
  243.    free(orig_str);
  244.   }
  245.   stats = stats_first;
  246.   printf("\n");
  247.   stats = sort_list(stats_first);
  248.   stats_first = stats; /* Get the first entry */
  249.   while(stats) {
  250.     total_hits+=stats->hits;
  251.     total_size+=stats->size;
  252.     stats = stats->next;
  253.   } /* Get the totals from the combined array, faster than calculating as we go */
  254.   linecount = total_hits;
  255.   stats = stats_first;
  256.   printf("Displaying list of countries that have more than 5%% of the total hit count\n");
  257.   while(stats) {
  258.     if((((float)stats->hits/(float)linecount)*100) > 5) /* Only print countries that have > 10 % of the hits */
  259.       printf("Country: %s Hits: %d [%.2f%% of total] [%.2fmeg]\n",
  260.               stats->country, stats->hits,
  261.               (((float)stats->hits/(float)linecount)*(float)100), ((float)stats->size/1000000));
  262.     stats = stats->next;
  263.   }
  264.   time(&end);
  265.   seconds = difftime(end, start);
  266.   printf("Processed %lu records in %.f seconds\n",total_hits, seconds);
  267.   printf("Total recorded data transfer: %6.3f gigs\n", (float)total_size/(float)1000000000);
  268. }
  269.  
  270. int main(int argc, char **argv) {
  271.  
  272.   geo_first = geo_ips = read_geo_db("geo_new.csv");
  273.   read_log(geo_first, argv[1]);
  274. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement