Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<iostream>
- #include<fstream>
- #include<sstream>
- #include<list>
- #include<vector>
- #include<cmath>
- #include<bits/stdc++.h>
- using namespace std;
- const int alphabet_size = 127;
- class Node{
- public:
- //Node *parent;
- Node *children[alphabet_size];
- bool isLeaf;
- int id;
- //vector<int> occurencies;
- Node(){//constructor
- for (int i = 0; i < alphabet_size; i++) {
- children[i] = NULL;
- }
- isLeaf = false;
- }
- };
- class Trie{
- public:
- Node *root;
- Trie(){//constructor
- // root= (Node *) calloc(1, sizeof(Node));
- root= new Node;
- //*root= new Node;
- }
- //void insert(string s,int index);
- void insert(string s, int id);
- int search(string s, Node *temp);
- };
- void Trie::insert(string s, int id){
- //int index =
- Node *temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index])
- temp->children[index]= new Node;
- temp = temp->children[index];
- }
- temp->isLeaf=true;
- temp->id = id;
- }
- int Trie::search(string s, Node *temp){
- temp = root;
- for(int i=0; i<s.size();i++){
- int index = s[i];
- if(!temp->children[index])
- return 0;
- temp = temp->children[index];
- }
- return temp->id;
- }
- class movie{
- public:
- int id;
- string title;
- vector<string> genres;
- float rating; //I intend to update the average incrementally: count ++; rating = ((rating*(count-1)) + newuserrating) /count;
- int count;
- void init(int entryid, string entrytitle, vector<string> entrygenres){
- id = entryid;
- title = entrytitle;
- genres = entrygenres;
- float rating = 0;
- int count = 0;
- }
- };
- class hashtable{
- public:
- list<movie> *table;
- int size; //ideally choose prime number that is at least 120% the number of entries
- int elements_count;
- int collisions_count;
- hashtable(int sz)
- {
- size = sz;
- table = new list<movie>[size];
- elements_count = 0;
- collisions_count=0;
- }
- void insert(movie entry){
- //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
- int h = hash(entry.id);
- if(!table[h].empty())
- collisions_count++;
- table[h].push_back(entry);
- elements_count++;
- }
- //using the division method for int entries:
- int hash(int id){
- return id % size;
- }
- movie search(int entry){
- int index = hash(entry);
- int cont=0;
- movie y;
- for(auto x: table[index])
- {
- cont++;
- if(x.id == entry)
- {
- return x;
- }
- }
- return y;
- }
- void del(int entry){
- int index = hash(entry);
- list <movie> :: iterator i;
- for (i = table[index].begin();i != table[index].end(); i++) {
- if (i->id == entry)
- break;
- }
- if (i != table[index].end()){
- table[index].erase(i);
- elements_count--;
- }
- }
- /*
- void del(int entry){
- int index = hash(entry);
- for(auto x: table[index])
- {
- if(x.id == entry)
- {
- table[index].erase(x);
- elements_count--;
- break;
- }
- }
- }
- */
- void print(){
- //just in case
- for(int i=0;i<size;i++)
- {
- for(auto d: table[i])
- cout<<d.title<<" ";
- cout<<endl;
- }
- }
- void searchmovie(string title){
- cout << title << endl; //TBD, just for testing for now
- }
- void searchuser(string userid){
- cout << userid << endl; //TBD, just for testing for now
- }
- void searchgenre(int N, string genre){
- cout << N << " " << genre << endl; //TBD, just for testing for now
- }
- void searchtag(vector<string> taglist){
- for (auto i: taglist)
- cout << i; //TBD, just for testing for now
- cout << endl;
- }
- };
- class data{
- };
- void getmovies(string filename, hashtable* h, Trie* t){
- int id;
- string title;
- string genre;
- vector<string> genres;
- movie entry;
- ifstream fin;
- string line; //to read each line
- fin.open(filename);
- string skip;
- string aux;
- getline(fin,skip);
- while(fin >> id){
- getline(fin,skip,'"');
- getline(fin,title,'"');
- getline(fin,skip,'"');
- getline(fin,aux,'"');
- stringstream temporary;
- temporary << aux;
- while(getline(temporary,aux,'|'))
- genres.push_back(aux);
- getline(fin,skip,'\n');
- entry.init(id,title,genres);
- h->insert(entry);
- t->insert(title,id);
- genres.clear();
- }
- fin.close();
- }
- int main(void){
- hashtable h(27281);
- Trie t;
- getmovies("movie.csv",&h,&t);
- cout << "\nDataset MovieLens 20M - Use 'help' to show commands.\n";
- int quit = 0;
- string input;
- while(!quit){
- cout << ">";
- getline(cin,input);
- istringstream iss(input);
- iss >> input;
- if (input == "help"){
- cout << "movie <title or prefix> - Movies by title or prefix.\n";
- cout << "user <userID> - Reviews made by user from given userID.\n";
- cout << "top<N> '<genre>' - Top N movies by rating in given genre (only movies with at least 1000 reviews are shown).\n";
- cout << "tags <list of tags> - Movies having the given tags (write tags inside apostrophes).\n";
- cout << "quit - Quits program.\n";
- }
- else if (input == "movie"){
- Node looked;
- movie hashlooked;
- string skip;
- getline(iss,skip,' ');
- getline(iss,input);
- cout << "movieid title genres rating count" << endl;
- if(t.search(input,&looked)){
- cout << looked.id;
- hashlooked = h.search(looked.id);
- //cout << looked.id << " " << hashlooked.title << " ";
- int count = 0;
- for(auto x: hashlooked.genres){
- if(count)
- cout << "|";
- cout << x;
- }
- //cout << " " << hashlooked.rating << " " << hashlooked.count;
- }
- }
- else if (input == "user"){
- iss >> input;
- h.searchuser(input);
- }
- else if (input.substr(0,3) == "top"){
- int N = stoi(input.substr(3,-1));
- iss >> input;
- if(input[0] != '\'' && input[-1] != '\'')
- cout << "Genre not recognised. Remember to write it between apostrophes.\n";
- else{
- h.searchgenre(N,input.substr(1,input.length()-2));
- }
- }
- else if (input == "tags"){
- vector<string> tags;
- string skip;
- string aux;
- getline(iss,skip,'\'');
- while(getline(iss,aux,'\''))
- tags.push_back(aux);
- if(!tags.empty())
- h.searchtag(tags);
- else
- cout << "Tags not recognised. Remember to write them between apostrophes.\n";
- }
- else if (input == "quit")
- quit = 1;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement