Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <string>
- #include <vector>
- #include <sstream>
- using namespace std;
- /*!!! Just before pasting this code to pastebin it failed the last test once randomly */
- //Slightly modified djb2 hashing function.
- unsigned long int get_hash(const string &extension, const vector<vector<vector<string>>> &table){
- unsigned long int index = 5381;
- for(int i = 0; i < extension.length(); i++){
- index = (((index << 7) + index) + extension[i]);
- }
- index = index % table.size();
- return index;
- }
- //Inserts the extension and media type into the hash table.
- void insert_hashed(const string &extension, const string &MT, vector<vector<vector<string>>> &table){
- unsigned long int index = get_hash(extension, table);
- table[index].push_back({extension, MT});
- }
- //Looks for a media type using the extension as key. If found returns the media type, if not returns "UNKNOWN".
- string find_hashed(const string &extension, const vector<vector<vector<string>>> &table){
- unsigned long int index = get_hash(extension, table);
- int i = 0;
- int size = table[index].size();
- while(i != size && extension != table[index][i][0]){
- i++;
- }
- if(i == size){
- return "UNKNOWN";
- }
- return table[index][i][1];
- }
- //Converts a string to lower case.
- void to_lower(string &str){
- for(int i = 0; i < str.length(); i++){
- str[i] = (char)tolower((unsigned char)str[i]);
- }
- }
- //Debug function. Used to test fitness of the hashing function. COMMENT it's use in main() for full performance
- void test(vector<vector<vector<string>>> &table){
- int size = table.size();
- int buckets = 0;
- int min_bucket = table[0].size();
- int max_bucket = 0;
- int bucket_size = 0;
- for(int i = 0; i < size; i++){
- bucket_size = table[i].size();
- if(bucket_size > 0){
- buckets++;
- if(bucket_size > max_bucket){
- max_bucket = bucket_size;
- }
- }
- if(bucket_size < min_bucket){
- min_bucket = bucket_size;
- }
- }
- cerr << "Buckets: " << buckets << "\n" <<
- "Empty indices: " << size - buckets << "\n" <<
- "Max size of bucket: " << max_bucket << "\n" <<
- "Min size of bucket: " << min_bucket << "\n" <<
- "Average fill of buckets(load): " << float((float)buckets/(float)size) << endl;
- }
- int main()
- {
- int N; // Number of elements which make up the association table.
- cin >> N; cin.ignore();
- int Q; // Number Q of file names to be analyzed.
- cin >> Q; cin.ignore();
- vector<vector<vector<string>>> table (N, vector<vector<string>>()); // The hash table.
- string EXT; // File extension.
- string MT; // MIME type.
- for (int i = 0; i < N; i++) {
- cin >> EXT >> MT; cin.ignore();
- to_lower(EXT);
- insert_hashed(EXT, MT, table);
- }
- test(table); // COMMENT this line for full performance!
- stringstream out_stream; // Stream to store all the output messages.
- string FNAME;
- string extension;
- for (int i = 0; i < Q; i++) {
- getline(cin, FNAME); // One file name per line.
- extension = FNAME.substr(FNAME.find_last_of('.') + 1); // Extraction of the extension
- if(!FNAME.compare(extension)){ // If FNAME and extension are the same then there was no '.' so the FNAME has no extension.
- //cout << "UNKNOWN\n"; // Swapped for the stringstream method.
- out_stream << "UNKNOWN\n";
- } else{
- to_lower(extension);
- //cout << find_hashed(extension, table) << "\n"; // Swapped for the stringstream method.
- out_stream << find_hashed(extension, table) << "\n";
- }
- /*You can swap around the commenting of stringstream and cout in the above section
- and comment the cout below to see the Large dataset fail while outputting the whole answer.*/
- }
- cout << out_stream.str() << endl; // Prints all output messages at once saving some time.
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement