Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstring>
- #include <fstream>
- #include <ctime>
- #include <cstdio>
- #include <cstdlib>
- using namespace std;
- static const size_t g_junk_size = 4096;
- static int g_dictionary_data[ g_junk_size * 3072 ];
- static int g_dictionaries = 0;
- static int g_dictionary_count = 0;
- static size_t total_byte_count = 0;
- static size_t total_word_count = 0;
- static size_t total_line_count = 0;
- void dump_dictionary( int dindex, char [], int buflen );
- //------------------------------------------------------------------------------
- int main( int argc, char *argv[] )
- {
- const clock_t start = clock();
- for( int idx = 1; idx != argc; ++idx )
- {
- FILE *file_handle = fopen( argv[idx], "rb" );
- if( !file_handle )
- continue;
- fseek( file_handle, 0L, SEEK_END );
- size_t byte_count = ftell( file_handle );
- fseek( file_handle, 0L, SEEK_SET );
- size_t dindex = 0;
- //buffered read
- char *buf = (char *)malloc( g_junk_size * sizeof(char) );
- if( !buf )
- {
- printf( "Not enough memory!!!\n" );
- return 1;
- }
- for( int bytes_left = (int)byte_count; bytes_left > 0; bytes_left-= g_junk_size )
- {
- size_t len = fread( buf, 1, g_junk_size, file_handle );
- size_t word_count = 0;
- size_t line_count = 0;
- int *dictionary_data = g_dictionary_data;
- int dictionary_count = g_dictionary_count;
- for( size_t idx = 0; idx != len; ++idx )
- {
- int c = buf[ idx ];
- if( ( 'a' <= c && c <= 'z') || ( 'A' <= c && c <= 'Z' ) )
- {
- int index = ( c - 'A') + dindex;
- dindex = dictionary_data[index];
- if( !dindex )
- {
- dictionary_data[index] = (++dictionary_count) * 64;
- dindex = dictionary_data[index];
- }
- }
- else
- {
- if( c == '\n' )
- ++line_count;
- if( dindex )
- {
- ++word_count;
- ++dictionary_data[ dindex + 26 ];
- dindex = 0;
- }
- }
- }
- total_word_count += word_count;
- total_line_count += line_count;
- g_dictionary_count = dictionary_count;
- }
- total_byte_count += byte_count;
- free( buf );
- fclose( file_handle );
- }
- const clock_t end = clock();
- printf( "Lines\tWords\tBytes\n");
- printf( "-------------------------------------\n" );
- printf( "%ld\t%ld\t%ld\tTotal\n", total_line_count, total_word_count, total_byte_count );
- printf( "-------------------------------------\n" );
- if( argc > 1 )
- {
- char buff[1024];
- dump_dictionary( 0, buff, 0 );
- }
- printf( "Time: %.0fms\n", (float)(end - start) / (float)CLOCKS_PER_SEC * (float)1000 );
- return 0;
- }
- void dump_dictionary( int dindex, char buf[], int buflen )
- {
- if( g_dictionary_data[ dindex + 26 ] != 0 )
- {
- char word[1024];
- strncpy( word, buf, buflen );
- word[buflen + 1] = 0;
- printf( "%d\t %s\n", g_dictionary_data[dindex + 26], word);
- }
- for( int idx = 0; idx != 64; ++idx )
- {
- if( g_dictionary_data[dindex + idx] && (idx != 26) )
- {
- buf[buflen] = (char)('A' + (idx) );
- dump_dictionary( g_dictionary_data[dindex + idx], buf, buflen + 1 );
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement