Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- struct geo_array {
- int idx;
- unsigned int ip_start;
- unsigned int ip_end;
- char *country;
- struct geo_array *next;
- };
- struct cache_array {
- unsigned int subnet;
- char *country;
- struct cache_array *next;
- };
- struct geo_stat {
- char *country;
- int hits;
- long size;
- struct geo_stat *next;
- };
- struct geo_array *geo_first;
- struct geo_array *geo_ips;
- struct cache_array *cache;
- struct cache_array *cache_first;
- struct cache_array *cache_previous;
- struct cache_array *cache_last;
- int cache_count = 0;
- int lineCount = 0;
- struct geo_array *read_geo_db(char *filename) { /* This reads the geo_ip database */
- FILE *fHandle;
- char fLine[4096];
- char *record;
- struct geo_array *geo_list;
- struct geo_array *geo_first;
- geo_list = geo_first = malloc(sizeof(struct geo_array));
- if(!(fHandle = fopen(filename, "r"))) {
- printf("Could not open file %s, dying\n", filename);
- exit(-1);
- }
- while(fgets(fLine, 4096, fHandle)) {
- lineCount++;
- record = strtok(fLine, ",");
- geo_list->idx = lineCount;
- geo_list->ip_start = atol(record);
- record = strtok(NULL, ",");
- geo_list->ip_end = atol(record);
- record = strtok(NULL, ",");
- geo_list->country = strdup(record);
- geo_list->country[strlen(record)-1]=0;
- geo_list->next = malloc(sizeof(struct geo_array));
- geo_list->next->next = NULL;
- geo_list = geo_list->next;
- }
- geo_list = geo_first;
- }
- char *check_country(char *IP, struct geo_array *geo) { /* Find the IP in the geo database, return NULL if not found */
- unsigned int IP_int = ntohl(inet_addr(IP));
- while(geo->next) {
- if((IP_int > geo->ip_start) && (IP_int < geo->ip_end)) {
- return(geo->country);
- }
- geo = geo->next;
- }
- return(NULL);
- }
- unsigned int ip24mask(unsigned int ip_int) { /* Convert the IP to the network entry of its corresponding /24 */
- unsigned int mask = ip_int &= ((-1 << 8) & ntohl(inet_addr("255.255.255.255")));
- ip_int = ip_int &= mask;
- return(ip_int);
- };
- char *check_cache(char *IP, struct geo_array *geo) { /* Build a cache of already identified /24s */
- char *country;
- unsigned int IP_int = ntohl(inet_addr(IP)); /* We need the IP address as an int in host byte order */
- unsigned int IP_subnet = ip24mask(IP_int); /* Do this now so we don't have to do it over and over again */
- if(!cache_count) { /* We don't have a cache */
- cache_previous = cache_first = cache = malloc(sizeof(struct cache_array)); /* Allocate for our first cache entry */
- cache->next = NULL; /* There is no next entry yet */
- if(!(country = check_country(IP, geo)))
- country = strdup("UNKNOWN"); /* Couldn't find a country so we need to give it one */
- cache->subnet = IP_subnet; /* Save only the network address of the /24 */
- cache->country = strdup(country); /* Copy the country address into the array */
- cache_last = cache; /* Save the last entry in the cache */
- cache_count = 1; /* We have a cache, set this */
- return(cache->country); /* Return the cache country */
- }
- else {
- while(cache->next) { /* Loop the cache looking for a match */
- if(cache->subnet == IP_subnet) {
- return(cache->country); /* We got a match, bail returning the country */
- }
- cache = cache->next; /* No match yet, next entry in the cache */
- }
- /* If we get here, there is no cache entry */
- cache = cache_last; /* Got to the last entry in the cache */
- cache->next = malloc(sizeof(struct cache_array)); /* Allocate some memory */
- cache = cache->next;
- cache->next = NULL; /* Since we're on the last entry, the next entry doesn't exist yet */
- cache_last = cache; /* Record this as being our last entry */
- if(!(country = check_country(IP, geo)))
- country = strdup("UNKNOWN"); /* We couldn't find a match */
- cache->subnet = IP_subnet;
- cache->country = strdup(country);
- return(cache->country);
- }
- };
- struct geo_stat *list_switch(struct geo_stat *l1, struct geo_stat *l2) {
- l1->next = l2->next;
- l2->next = l1;
- return(l2);
- }
- struct geo_stat *sort_list(struct geo_stat *stats) {
- struct geo_stat *top, *p, *q; /* Top of the array, a pointer to position - 1, and a pointer to position */
- int changed = 1;
- top = malloc(sizeof(struct geo_stat)); /* We need an extra entry at the top */
- top->next = stats; /* Start at the beginning of the stats array */
- if(stats && stats->next) {
- while(changed) {
- changed = 0;
- q = top;
- p = top->next;
- while(p->next) {
- if(p->hits < p->next->hits) {
- q->next = list_switch(p, p->next);
- changed = 1;
- }
- q = p;
- if(p->next)
- p = p->next;
- }
- }
- }
- p = top->next;
- free(top);
- return(p);
- };
- void *read_log(struct geo_array *geo, char *logfile) {
- FILE *fHandle;
- char logline[4096];
- char *string_break1;
- char *string_break2;
- char *string_break3;
- char *country;
- struct geo_stat *stats = malloc(sizeof(struct geo_stat));
- struct geo_stat *stats_first = stats;
- struct geo_stat *stats_previous;
- int stat_count = 0;
- int found_hit = 0;
- int linecount = 0;
- time_t start;
- time_t end;
- double seconds;
- char *find_codes_size, *orig_str;
- int count = 0;
- char *record, *record2;
- long total_hits, total_size;
- total_hits = total_size = 0;
- fHandle=fopen(logfile, "r");
- time(&start);
- while(fgets(logline, 4096, fHandle)) {
- /* if(!(linecount)) {
- orig_str = find_codes_size = strdup(logline); /
- record = strtok(find_codes_size, "[");
- record = strtok(NULL, "[");
- record2 = strstr(record, "]");
- record2[0] = '\0';
- printf("Processing logfile %s\nFirst entry at %s\n",logfile,record);
- free(orig_str);
- } */
- // linecount++;
- string_break1 = strstr(logline, " "); /* Next coupla lines break up the log entry */
- string_break1[0] = 0;
- string_break1++;
- string_break2 = strstr(string_break1, " ");
- string_break2[0] = 0;
- string_break2++;
- orig_str = find_codes_size = strdup(string_break2); /* Duplicate the string, we are gonna be screwing with this */
- record = strtok(find_codes_size, " ");
- for(count = 0; count < 8; count++)
- record=strtok(NULL, " ");
- cache = cache_previous = cache_first; /* Reset our cache positioning */
- country = check_cache(string_break1, geo); /* Check the cache to see if we have an entry */
- if(!country)
- country = strdup("UNKNOWN"); /* We didn't have an entry and the IP isn't recognized */
- stats = stats_first; /* Go to the first entry in our stats */
- if(stat_count == 0) { /* There IS no stat array yet, make one */
- stats->country = strdup(country);
- stats->hits = 1;
- stats->size = atol(record);
- stats->next = NULL;
- stat_count++;
- }
- else {
- stat_count = 0;
- while(stats) {
- stat_count++;
- if(!(strcmp(stats->country, country))) {
- stats->hits++;
- stats->size += atol(record);
- found_hit = 1;
- break;
- }
- stats_previous = stats;
- stats = stats->next;
- }
- stats = stats_previous;
- if(found_hit == 0) {
- stat_count++;
- stats->next = malloc(sizeof(struct geo_stat));
- stats = stats->next;
- stats->next = NULL;
- stats->country = strdup(country);
- stats->hits = 1;
- stats->size = atol(record);
- }
- stats = stats->next;
- found_hit = 0;
- }
- free(orig_str);
- }
- stats = stats_first;
- printf("\n");
- stats = sort_list(stats_first);
- stats_first = stats; /* Get the first entry */
- while(stats) {
- total_hits+=stats->hits;
- total_size+=stats->size;
- stats = stats->next;
- } /* Get the totals from the combined array, faster than calculating as we go */
- linecount = total_hits;
- stats = stats_first;
- printf("Displaying list of countries that have more than 5%% of the total hit count\n");
- while(stats) {
- if((((float)stats->hits/(float)linecount)*100) > 5) /* Only print countries that have > 10 % of the hits */
- printf("Country: %s Hits: %d [%.2f%% of total] [%.2fmeg]\n",
- stats->country, stats->hits,
- (((float)stats->hits/(float)linecount)*(float)100), ((float)stats->size/1000000));
- stats = stats->next;
- }
- time(&end);
- seconds = difftime(end, start);
- printf("Processed %lu records in %.f seconds\n",total_hits, seconds);
- printf("Total recorded data transfer: %6.3f gigs\n", (float)total_size/(float)1000000000);
- }
- int main(int argc, char **argv) {
- geo_first = geo_ips = read_geo_db("geo_new.csv");
- read_log(geo_first, argv[1]);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement