Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <fstream>
- #include <iostream>
- #include <string>
- #include <vector>
- using namespace std;
- const int hashRestriction = 10;
- ifstream fileToRead;
- void remove(vector<string> &vec, int index)
- {
- auto it = vec.begin();
- advance(it, index);
- vec.erase(it);
- }
- void removeSames(vector<string> &words) {
- for (int i = 0; i < words.size() - 1; i++) {
- for (int j = i+1; j < words.size(); j++) {
- if (words[i] == words[j]) {
- remove(words, j);
- }
- }
- }
- }
- int count(vector<string> words, string toSearch) {
- int c = 0;
- for (auto word : words) {
- if (word == toSearch) c++;
- }
- return c;
- }
- class hashTable {
- private:
- vector<vector<string>> words;
- int lines = hashRestriction+1;
- public:
- void create() {
- for (int i = 0; i < lines; i++)
- {
- words.push_back(vector<string>());
- }
- }
- void add(int hash, string word) {
- int line = hash;
- words[line].push_back(word);
- }
- void listCollision() {
- bool hasFound = false;
- for (int i = 0; i < lines; i++) {
- if (words[i].size() > 0) {
- vector<string> reccuring;
- for (int j = 0; j < words[i].size(); j++) {
- reccuring.push_back(words[i][j]);
- }
- removeSames(reccuring);
- if (reccuring.size() > 1) {
- hasFound = true;
- cout << "Reccuring for hash " << i << " was found:" << endl;
- for (auto word : reccuring) {
- int c = count(words[i], word);
- if (c > 1) {
- cout << "Found coinsidences of word \"" << word << "\" for this hash: " << c << endl;
- }
- else {
- cout << "Found collision for this hash: " << word << endl;
- }
- }
- }
- else {
- cout << "Hash " << i << " is only for word \"" << words[i][0] << "\"" << endl;
- }
- cout << endl;
- }
- }
- if (!hasFound) {
- cout << "Reccuring words' hashes were not found" << endl;
- }
- }
- };
- string purify(string str) {
- string word = str;
- string restricted = ".,;:!?& ";
- for (int i = 0; i < word.size(); i++) {
- for (int j = 0; j < restricted.size(); j++) {
- if (word[i] == restricted[j]) {
- word.erase(i, 1);
- }
- }
- }
- return word;
- }
- vector<string> reverse(vector<string> words) {
- vector<string> reversed;
- for (auto str : words) {
- string res = "";
- for (int i = str.size() - 1; i >= 0; i--) {
- res += str[i];
- }
- reversed.push_back(res);
- }
- return reversed;
- }
- vector<string> split(vector<string> text) {
- vector<string> words;
- for (auto str : text) {
- str += ' ';
- while (str.find(' ') != string::npos) {
- words.push_back(purify(str.substr(0, str.find(' '))));
- str.erase(0, str.find(' ') + 1);
- }
- }
- return words;
- }
- unsigned int JSHash(const std::string& str)
- {
- unsigned int hash = 1315423911;
- for (std::size_t i = 0; i < str.length(); i++)
- {
- hash ^= ((hash << 5) + str[i] + (hash >> 2));
- }
- return hash % hashRestriction;
- }
- int main()
- {
- char fnamer[1000] = "";
- struct stat buff;
- do {
- fileToRead.close();
- cout << "Enter full path to the fileToRead" << endl;
- cin >> fnamer;
- fileToRead.open(fnamer);
- } while (stat(fnamer, &buff) != 0 || !fileToRead);
- vector<string> dataBase;
- string str;
- while (getline(fileToRead, str)) {
- dataBase.push_back(str);
- }
- for (auto word : dataBase) {
- cout << word << endl;
- }
- vector<string> words = reverse(split(dataBase));
- hashTable table;
- table.create();
- for (auto word : words) {
- table.add(JSHash(word), word);
- }
- table.listCollision();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement