Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<iostream>
- #include<fstream>
- #include<sstream>
- #include<list>
- #include<vector>
- #include<cmath>
- //#include<bits/stdc++.h>
- using namespace std;
- const int alphabet_size = 127;
- class Node{
- public:
- //Node *parent;
- Node *children[alphabet_size];
- bool isLeaf;
- int id;
- //vector<int> occurencies;
- Node(){//constructor
- for (int i = 0; i < alphabet_size; i++) {
- children[i] = NULL;
- }
- isLeaf = false;
- }
- };
- class Trie{
- public:
- Node *root;
- Trie(){//constructor
- // root= (Node *) calloc(1, sizeof(Node));
- root= new Node;
- //*root= new Node;
- }
- //void insert(string s,int index);
- void insert(string s, int id);
- Node* search(string s); //searches for string and returns the Node if it's found
- };
- void Trie::insert(string s, int id){
- //int index =
- Node *temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index])
- temp->children[index]= new Node;
- temp = temp->children[index];
- }
- temp->isLeaf=true;
- temp->id = id;
- }
- Node* Trie::search(string s){
- Node *temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index]){
- return nullptr; //next letter is not in trie, returns null
- }
- temp = temp->children[index];
- }
- if(temp != NULL)
- return temp; //returns node since got to the end of word and is still in trie
- else{
- return nullptr;
- }
- }
- class movie{
- public:
- int id;
- string title;
- vector<string> genres;
- float rating; //I intend to update the average incrementally: count ++; rating = ((rating*(count-1)) + newuserrating) /count;
- int count;
- void init(int entryid, string entrytitle, vector<string> entrygenres){
- id = entryid;
- title = entrytitle;
- genres = entrygenres;
- rating = 0;
- count = 0;
- }
- };
- class hashtable{
- public:
- vector<movie> *table;
- int size; //ideally choose prime number that is at least 120% the number of entries
- int elements_count;
- int collisions_count;
- hashtable(int sz)
- {
- size = sz;
- table = new vector<movie>[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(movie entry){
- //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
- int h = hash(entry.id);
- if(!table[h].empty())
- collisions_count++;
- table[h].push_back(entry);
- elements_count++;
- }
- //using the division method for int entries:
- int hash(int id){
- return id % size;
- }
- movie search(int entry){
- int index = hash(entry);
- movie y;
- y.id = -1;
- for(auto x: table[index])
- {
- if(x.id == entry)
- {
- return x;
- }
- }
- return y; // :]
- }
- void del(int entry){
- int index = hash(entry);
- vector <movie> :: iterator i;
- for (i = table[index].begin();i != table[index].end(); i++) {
- if (i->id == entry)
- break;
- }
- if (i != table[index].end()){
- table[index].erase(i);
- elements_count--;
- }
- }
- void print(){
- //just in case
- for(int i=0;i<size;i++)
- {
- for(auto d: table[i])
- cout<<d.title<<" ";
- cout<<endl;
- }
- }
- void searchmovie(string title){
- cout << title << endl; //TBD, just for testing for now
- }
- void searchuser(string userid){
- cout << userid << endl; //TBD, just for testing for now
- }
- void searchgenre(int N, string genre){
- cout << N << " " << genre << endl; //TBD, just for testing for now
- }
- void searchtag(vector<string> taglist){
- for (auto i: taglist)
- cout << i; //TBD, just for testing for now
- cout << endl;
- }
- };
- class user{
- int coco;
- };
- class tag{
- public:
- user* author;
- movie* film;
- string text;
- };
- class hashtablet{
- public:
- vector<tag> *table;
- int size; //ideally choose prime number that is at least 120% the number of entries
- int elements_count;
- int collisions_count;
- hashtablet(int sz)
- {
- size = sz;
- table = new vector<tag>[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(tag entry){
- //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
- int h = hash(entry.text);
- if(!table[h].empty())
- collisions_count++;
- table[h].push_back(entry);
- elements_count++;
- }
- int hash(string s){
- int cont=0;
- int hax=0;
- int ind =0;
- int p=31; //arbitrary value that fits very well when equal to 31.
- for(int i=0;i<s.size();i++)
- {
- hax=int(hax+ int(s[i])*pow(p,cont))%size;
- cont++;
- };
- return hax;
- }
- tag search(string entry){
- int index = hash(entry);
- tag y;
- y.text = nullptr;
- for(auto x: table[index])
- {
- if(x.text == entry)
- {
- return x;
- }
- }
- return y; // :]
- }
- };
- class genre{
- public:
- string text;
- vector<movie*> films;
- };
- class hashtableg{
- public:
- vector<genre> *table;
- int size; //ideally choose prime number that is at least 120% the number of entries
- int elements_count;
- int collisions_count;
- hashtableg(int sz)
- {
- size = sz;
- table = new vector<genre>[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(genre entry){
- //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
- int h = hash(entry.text);
- if(!table[h].empty())
- collisions_count++;
- table[h].push_back(entry);
- elements_count++;
- }
- int hash(string s){
- int cont=0;
- int hax=0;
- int ind =0;
- int p=31; //arbitrary value that fits very well when equal to 31.
- for(int i=0;i<s.size();i++)
- {
- hax=int(hax+ int(s[i])*pow(p,cont))%size;
- cont++;
- };
- return hax;
- }
- genre search(string entry){
- int index = hash(entry);
- genre y;
- y.text = nullptr;
- for(auto x: table[index])
- {
- if(x.text == entry)
- {
- return x;
- }
- }
- return y; // :]
- }
- };
- void getmovies(string filename, hashtable* h, hashtableg* g, Trie* t){
- int id;
- string title;
- string genrename;
- vector<string> genres;
- movie entry;
- genre genreitem;
- ifstream fin;
- string line; //to read each line
- fin.open(filename);
- string skip; //used to receive unwanted data
- string aux;
- getline(fin,skip);
- while(fin >> id){ //first data, receives id, and stop if that's
- getline(fin,skip,'"'); //skips past first "
- getline(fin,title,'"'); // reads title
- getline(fin,skip,'"'); //skips past "
- getline(fin,aux,'"'); //gets whole genres string
- stringstream temporary; //ss to parse genres string
- temporary << aux;
- while(getline(temporary,aux,'|')){ //parsing genres between |s
- genres.push_back(aux);
- genreitem = hashtableg->search(aux);
- if(genreitem.text == nullptr)
- }
- getline(fin,skip,'\n'); //you know the drill
- entry.init(id,title,genres); //fills movie object
- h->insert(entry); //inserts it in hashtable
- t->insert(title,id);//insert read data in trie
- genres.clear();
- }
- fin.close();
- }
- void attrating(hashtable* hmovies, float newrating, int movieid){//function called every time a movie rating is readed to assign the value of it's rating.
- int index, cont=0, aux=0, c;
- index = hmovies->hash(movieid);
- for(auto x: hmovies->table[index])
- {
- if(x.id == movieid)
- {
- hmovies->table[index][cont].count++;
- c= hmovies->table[index][cont].count;
- aux = hmovies->table[index][cont].rating;
- hmovies->table[index][cont].rating= (aux*(c-1)+newrating)/c; //rating = ((rating*(count-1)) + newuserrating) /count;
- }
- cont++;
- }
- }
- void getrating(string filename, hashtable* hratings, hashtable* hmovies){
- int userid, movieid;
- float rating;
- string aux;
- char ignore;
- ifstream fin;
- fin.open(filename);
- getline(fin, aux);
- while(fin >> userid){
- fin>>ignore;//ignore the commas
- fin >> movieid;
- fin>>ignore;
- fin>>rating;
- attrating(hmovies, rating, movieid);
- getline(fin, aux);
- }
- fin.close();
- }
- void prefixsearch(Node* root, hashtable h){
- if(root->isLeaf){ //stop condition, prints stuff on screen
- movie movieinfo = h.search(root->id);
- cout << root->id << " : " << movieinfo.title << " : ";
- int count = 0;
- for(auto x: movieinfo.genres){
- if(count)
- cout << "|";
- cout << x;
- count++;
- }
- cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
- }
- else{ //else tries on children (just like ww2 german experiments)
- for(int i = 0; i < alphabet_size; i++)
- if(root->children[i])
- prefixsearch(root->children[i], h);
- }
- }
- int main(void){
- hashtable h(5501);
- hashtable h1(27281);
- Trie t;
- getmovies("movie.csv",&h,&t);
- getrating("minirating.csv",&h1, &h);
- cout << "\nDataset MovieLens 20M - Use 'help' to show commands.\n";
- cout << "We do not store your data\n";
- int quit = 0;
- string input;
- while(!quit){
- cout << ">";
- getline(cin,input);
- istringstream iss(input);
- iss >> input;
- if (input == "help"){
- cout << "movie <title or prefix> - Movies by title or prefix.\n";
- cout << "user <userID> - Reviews made by user from given userID.\n";
- cout << "top<N> '<genre>' - Top N movies by rating in given genre (only movies with at least 1000 reviews are shown).\n";
- cout << "tags <list of tags> - Movies having the given tags (write tags inside apostrophes).\n";
- cout << "quit - Quits program.\n";
- }
- else if (input == "movie"){
- string skip;
- getline(iss,skip,' '); //skip space
- getline(iss,input); //gets full movie name written
- Node *query = t.search(input); //searches for stuff written
- if(query != nullptr){ //if it found something
- cout << "movieid:title:genres:rating:count" << endl << endl;
- if(query->isLeaf){
- movie movieinfo = h.search(query->id);
- cout << query->id << " : " << movieinfo.title << " : ";
- int count = 0;
- for(auto x: movieinfo.genres){
- if(count)
- cout << "|";
- cout << x;
- count++;
- }
- cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
- }
- else
- prefixsearch(query,h);
- }
- else
- cout << "No results were found." << endl;
- }
- else if (input == "user"){
- iss >> input;
- h.searchuser(input);
- }
- else if (input.substr(0,3) == "top"){
- int N = stoi(input.substr(3,-1)); //gets stuff after 'top' from string into int
- iss >> input;
- if(input[0] != '\'' && input[-1] != '\'') //checks for apostrophes
- cout << "Genre not recognised. Remember to write it between apostrophes.\n";
- else{
- h.searchgenre(N,input.substr(1,input.length()-2)); //searches removing them
- }
- }
- else if (input == "tags"){
- vector<string> tags;
- string skip;
- string aux;
- getline(iss,skip,'\'');
- while(getline(iss,aux,'\''))
- tags.push_back(aux); //separates between apostrophes and removes them
- if(!tags.empty())
- h.searchtag(tags);
- else
- cout << "Tags not recognised. Remember to write them between apostrophes.\n";
- }
- else if (input == "quit")
- quit = 1;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement