Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<iostream>
- #include<fstream>
- #include<sstream>
- #include<list>
- #include<vector>
- #include<cmath>
- #include<utility>
- //#include<bits/stdc++.h>
- using namespace std;
- const int alphabet_size = 127;
- class Node{
- public:
- //Node *parent;
- Node *children[alphabet_size];
- bool isLeaf;
- int id;
- //vector<int> occurencies;
- Node(){//constructor
- for (int i = 0; i < alphabet_size; i++) {
- children[i] = NULL;
- }
- isLeaf = false;
- }
- };
- class Trie{
- public:
- Node *root;
- Trie(){//constructor
- // root= (Node *) calloc(1, sizeof(Node));
- root= new Node;
- //*root= new Node;
- }
- //void insert(string s,int index);
- void insert(string s, int id);
- Node* search(string s); //searches for string and returns the Node if it's found
- };
- void Trie::insert(string s, int id){
- //int index =
- Node *temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index])
- temp->children[index]= new Node;
- temp = temp->children[index];
- }
- temp->isLeaf=true;
- temp->id = id;
- }
- Node* Trie::search(string s){
- Node *temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index]){
- return nullptr; //next letter is not in trie, returns null
- }
- temp = temp->children[index];
- }
- if(temp != NULL)
- return temp; //returns node since got to the end of word and is still in trie
- else{
- return nullptr;
- }
- }
- class movie{
- public:
- int id;
- string title;
- vector<string> genres;
- float rating; //I intend to update the average incrementally: count ++; rating = ((rating*(count-1)) + newuserrating) /count;
- int count;
- void init(int entryid, string entrytitle, vector<string> entrygenres){
- id = entryid;
- title = entrytitle;
- genres = entrygenres;
- rating = 0;
- count = 0;
- }
- };
- class user{
- public:
- int id;
- vector<pair<int, float>> v;
- //movie id, user rating
- };
- class hashtableu{
- public:
- user *table;
- int size;
- int elements_count;
- int collisions_count;
- hashtableu(int sz)
- {
- this->size = sz;
- table = new user[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(int userid, int movieid, float rating){
- int index;
- index = hash(userid);
- //user u;
- //if(u.v.empty())
- // u.id = userid;
- //pair<string,float> mr;
- //mr = makepair(filmtitle,rating);
- //u.push_back(mr);
- //table[index].push_back(u);
- table[index].v.push_back(make_pair(movieid, rating));
- }
- int hash(int id){
- return id % size;
- }
- };
- class hashtablem{//hash table só que de filme kk
- public:
- vector<movie> *table;
- int size; //ideally choose prime number that is at least 120% the number of entries
- int elements_count;
- int collisions_count;
- hashtablem(int sz)
- {
- size = sz;
- table = new vector<movie>[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(movie entry){
- //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
- int h = hash(entry.id);
- if(!table[h].empty())
- collisions_count++;
- table[h].push_back(entry);
- elements_count++;
- }
- //using the division method for int entries:
- int hash(int id){
- return id % size;
- }
- movie search(int entry){
- int index = hash(entry);
- movie y;
- y.id = -1;
- for(auto x: table[index])
- {
- if(x.id == entry)
- {
- return x;
- }
- }
- return y; // :]
- }
- void del(int entry){
- int index = hash(entry);
- vector <movie> :: iterator i;
- for (i = table[index].begin();i != table[index].end(); i++) {
- if (i->id == entry)
- break;
- }
- if (i != table[index].end()){
- table[index].erase(i);
- elements_count--;
- }
- }
- void print(){
- //just in case
- for(int i=0;i<size;i++)
- {
- for(auto d: table[i])
- cout<<d.title<<" ";
- cout<<endl;
- }
- }
- };
- void getmovies(string filename, hashtablem* h, Trie* t){
- int id;
- string title;
- string genre;
- vector<string> genres;
- movie entry;
- ifstream fin;
- string line; //to read each line
- fin.open(filename);
- string skip; //used to receive unwanted data
- string aux;
- getline(fin,skip);
- while(fin >> id){ //first data, receives id, and stop if that's
- getline(fin,skip,'"'); //skips past first "
- getline(fin,title,'"'); // reads title
- getline(fin,skip,'"'); //skips past "
- getline(fin,aux,'"'); //gets whole genres string
- stringstream temporary; //ss to parse genres string
- temporary << aux;
- while(getline(temporary,aux,'|')) //parsing genres between |s
- genres.push_back(aux);
- getline(fin,skip,'\n'); //you know the drill
- entry.init(id,title,genres); //fills movie object
- h->insert(entry); //inserts it in hashtablem
- t->insert(title,id);//insert read data in trie
- genres.clear();
- }
- fin.close();
- }
- void attrating(hashtablem* hmovies, float newrating, int movieid){//function called every time a movie rating is readed to assign the value of it's rating.
- int index, cont=0, aux=0, c;
- index = hmovies->hash(movieid);
- for(auto x: hmovies->table[index])
- {
- if(x.id == movieid)
- {
- hmovies->table[index][cont].count++;
- c= hmovies->table[index][cont].count;
- aux = hmovies->table[index][cont].rating;
- hmovies->table[index][cont].rating= (aux*(c-1)+newrating)/c; //rating = ((rating*(count-1)) + newuserrating) /count;
- }
- cont++;
- }
- }
- void getrating(string filename, hashtableu* husers, hashtablem* hmovies){
- int userid, movieid;
- float rating;
- movie m;
- string aux;
- char ignore;
- ifstream fin;
- fin.open(filename);
- getline(fin, aux);
- while(fin >> userid){
- fin>>ignore;//ignore the commas
- fin >> movieid;
- fin>>ignore;
- //m = hmovies->search(movieid);
- fin>>rating;
- attrating(hmovies, rating, movieid);
- husers->insert(userid, movieid, rating);
- getline(fin, aux);
- }
- fin.close();
- }
- void prefixsearch(Node* root, hashtablem h){
- if(root->isLeaf){ //stop condition, prints stuff on screen
- movie movieinfo = h.search(root->id);
- cout << root->id << " : " << movieinfo.title << " : ";
- int count = 0;
- for(auto x: movieinfo.genres){
- if(count)
- cout << "|";
- cout << x;
- count++;
- }
- cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
- }
- else{ //else tries on children (just like ww2 german experiments)
- for(int i = 0; i < alphabet_size; i++)
- if(root->children[i])
- prefixsearch(root->children[i], h);
- }
- }
- int main(void){
- hashtablem h(5501); //movies
- hashtableu h1(27281); //users
- Trie t;
- movie m;
- getmovies("movie.csv",&h,&t);
- getrating("minirating.csv",&h1, &h);
- cout << "\nDataset MovieLens 20M - Use 'help' to show commands.\n";
- int quit = 0;
- string input;
- while(!quit){
- cout << ">";
- getline(cin,input);
- istringstream iss(input);
- iss >> input;
- if (input == "help"){
- cout << "movie <title or prefix> - Movies by title or prefix.\n";
- cout << "user <userID> - Reviews made by user from given userID.\n";
- cout << "top<N> '<genre>' - Top N movies by rating in given genre (only movies with at least 1000 reviews are shown).\n";
- cout << "tags <list of tags> - Movies having the given tags (write tags inside apostrophes).\n";
- cout << "quit - Quits program.\n";
- }
- else if (input == "movie"){
- string skip;
- getline(iss,skip,' '); //skip space
- getline(iss,input); //gets full movie name written
- Node *query = t.search(input); //searches for stuff written
- if(query != nullptr){ //if it found something
- cout << "movieid:title:genres:rating:count" << endl << endl;
- if(query->isLeaf){
- movie movieinfo = h.search(query->id);
- cout << query->id << " : " << movieinfo.title << " : ";
- int count = 0;
- for(auto x: movieinfo.genres){
- if(count)
- cout << "|";
- cout << x;
- count++;
- }
- cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
- }
- else
- prefixsearch(query,h);
- }
- else
- cout << "No results were found." << endl;
- }
- else if (input == "user"){
- iss >> input;
- iss >> input;
- int N = stoi(input); //gets stuff after 'top' from string into int
- int index = h1.hash(N);
- int auxr=0;
- for(int i=0; i<h1.table[index].v.size();i++){
- auxr = h1.table[index].v[i].first;
- m = h.search(auxr);
- cout<<"user rating: "<<h1.table[index].v[i].second;
- cout<<" movie title: "<<m.title;
- cout<<" global rating: "<<m.rating;
- cout<<" global count: "<<m.count<<endl;
- }
- }
- else if (input.substr(0,3) == "top"){
- int N = stoi(input.substr(3,-1)); //gets stuff after 'top' from string into int
- iss >> input;
- if(input[0] != '\'' && input[-1] != '\'') //checks for apostrophes
- cout << "Genre not recognised. Remember to write it between apostrophes.\n";
- else{
- //h.searchgenre(N,input.substr(1,input.length()-2)); //searches removing them
- }
- }
- else if (input == "tags"){
- vector<string> tags;
- string skip;
- string aux;
- getline(iss,skip,'\'');
- while(getline(iss,aux,'\''))
- tags.push_back(aux); //separates between apostrophes and removes them
- if(!tags.empty())
- cout<<"oi"<<endl;
- else
- cout << "Tags not recognised. Remember to write them between apostrophes.\n";
- }
- else if (input == "quit")
- quit = 1;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement