Advertisement
Guest User

tudobugado

a guest
Dec 13th, 2019
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.27 KB | None | 0 0
  1. #include<iostream>
  2. #include<fstream>
  3. #include<sstream>
  4. #include<list>
  5. #include<vector>
  6. #include<cmath>
  7. //#include<bits/stdc++.h>
  8.  
  9. using namespace std;
  10. const int alphabet_size = 127;
  11.  
  12. class Node{
  13. public:
  14. //Node *parent;
  15. Node *children[alphabet_size];
  16. bool isLeaf;
  17. int id;
  18. //vector<int> occurencies;
  19. Node(){//constructor
  20. for (int i = 0; i < alphabet_size; i++) {
  21. children[i] = NULL;
  22. }
  23. isLeaf = false;
  24. }
  25. };
  26.  
  27. class Trie{
  28. public:
  29. Node *root;
  30.  
  31. Trie(){//constructor
  32. // root= (Node *) calloc(1, sizeof(Node));
  33. root= new Node;
  34. //*root= new Node;
  35. }
  36.  
  37. //void insert(string s,int index);
  38. void insert(string s, int id);
  39. Node* search(string s); //searches for string and returns the Node if it's found
  40. };
  41.  
  42. void Trie::insert(string s, int id){
  43. //int index =
  44. Node *temp = root;
  45. for(int i=0; i<s.size();i++){
  46. int index = s[i];
  47. if(!temp->children[index])
  48. temp->children[index]= new Node;
  49. temp = temp->children[index];
  50. }
  51. temp->isLeaf=true;
  52. temp->id = id;
  53. }
  54.  
  55. Node* Trie::search(string s){
  56. Node *temp = root;
  57. for(int i=0; i<s.size();i++){
  58. int index = s[i];
  59. if(!temp->children[index]){
  60. return nullptr; //next letter is not in trie, returns null
  61. }
  62. temp = temp->children[index];
  63. }
  64. if(temp != NULL)
  65. return temp; //returns node since got to the end of word and is still in trie
  66. else{
  67. return nullptr;
  68. }
  69. }
  70.  
  71.  
  72. class movie{
  73. public:
  74. int id;
  75. string title;
  76. vector<string> genres;
  77. float rating; //I intend to update the average incrementally: count ++; rating = ((rating*(count-1)) + newuserrating) /count;
  78. int count;
  79.  
  80. void init(int entryid, string entrytitle, vector<string> entrygenres){
  81. id = entryid;
  82. title = entrytitle;
  83. genres = entrygenres;
  84. rating = 0;
  85. count = 0;
  86. }
  87. };
  88.  
  89. class hashtable{
  90. public:
  91.  
  92. vector<movie> *table;
  93. int size; //ideally choose prime number that is at least 120% the number of entries
  94. int elements_count;
  95. int collisions_count;
  96.  
  97. hashtable(int sz)
  98. {
  99. size = sz;
  100. table = new vector<movie>[size];
  101. elements_count = 0;
  102. collisions_count=0;
  103. }
  104.  
  105. void insert(movie entry){
  106. //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
  107. int h = hash(entry.id);
  108. if(!table[h].empty())
  109. collisions_count++;
  110. table[h].push_back(entry);
  111. elements_count++;
  112.  
  113. }
  114.  
  115. //using the division method for int entries:
  116. int hash(int id){
  117. return id % size;
  118. }
  119.  
  120. movie search(int entry){
  121. int index = hash(entry);
  122. movie y;
  123. y.id = -1;
  124. for(auto x: table[index])
  125. {
  126. if(x.id == entry)
  127. {
  128. return x;
  129. }
  130. }
  131. return y; // :]
  132. }
  133.  
  134. void del(int entry){
  135. int index = hash(entry);
  136. vector <movie> :: iterator i;
  137. for (i = table[index].begin();i != table[index].end(); i++) {
  138. if (i->id == entry)
  139. break;
  140. }
  141. if (i != table[index].end()){
  142. table[index].erase(i);
  143. elements_count--;
  144. }
  145. }
  146.  
  147. void print(){
  148. //just in case
  149. for(int i=0;i<size;i++)
  150. {
  151. for(auto d: table[i])
  152. cout<<d.title<<" ";
  153. cout<<endl;
  154.  
  155. }
  156. }
  157.  
  158. void searchmovie(string title){
  159. cout << title << endl; //TBD, just for testing for now
  160. }
  161.  
  162. void searchuser(string userid){
  163. cout << userid << endl; //TBD, just for testing for now
  164. }
  165.  
  166. void searchgenre(int N, string genre){
  167. cout << N << " " << genre << endl; //TBD, just for testing for now
  168. }
  169.  
  170. void searchtag(vector<string> taglist){
  171. for (auto i: taglist)
  172. cout << i; //TBD, just for testing for now
  173. cout << endl;
  174. }
  175.  
  176. };
  177.  
  178. class user{
  179. int coco;
  180. };
  181.  
  182. class tag{
  183. public:
  184. user* author;
  185. movie* film;
  186. string text;
  187. };
  188.  
  189. class hashtablet{
  190. public:
  191. vector<tag> *table;
  192. int size; //ideally choose prime number that is at least 120% the number of entries
  193. int elements_count;
  194. int collisions_count;
  195. hashtablet(int sz)
  196. {
  197. size = sz;
  198. table = new vector<tag>[size];
  199. elements_count = 0;
  200. collisions_count=0;
  201. }
  202.  
  203. void insert(tag entry){
  204. //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
  205. int h = hash(entry.text);
  206. if(!table[h].empty())
  207. collisions_count++;
  208. table[h].push_back(entry);
  209. elements_count++;
  210.  
  211. }
  212.  
  213. int hash(string s){
  214. int cont=0;
  215. int hax=0;
  216. int ind =0;
  217. int p=31; //arbitrary value that fits very well when equal to 31.
  218. for(int i=0;i<s.size();i++)
  219. {
  220. hax=int(hax+ int(s[i])*pow(p,cont))%size;
  221. cont++;
  222. };
  223. return hax;
  224.  
  225. }
  226.  
  227. tag search(string entry){
  228. int index = hash(entry);
  229. tag y;
  230. y.text = nullptr;
  231. for(auto x: table[index])
  232. {
  233. if(x.text == entry)
  234. {
  235. return x;
  236. }
  237. }
  238. return y; // :]
  239. }
  240.  
  241. };
  242.  
  243. class genre{
  244. public:
  245. string text;
  246. vector<movie*> films;
  247. };
  248.  
  249. class hashtableg{
  250. public:
  251. vector<genre> *table;
  252. int size; //ideally choose prime number that is at least 120% the number of entries
  253. int elements_count;
  254. int collisions_count;
  255.  
  256. hashtableg(int sz)
  257. {
  258. size = sz;
  259. table = new vector<genre>[size];
  260. elements_count = 0;
  261. collisions_count=0;
  262. }
  263.  
  264. void insert(genre entry){
  265. //consists in generating the hash function of the correspondent movie and doing the push_back in table[hash]
  266. int h = hash(entry.text);
  267. if(!table[h].empty())
  268. collisions_count++;
  269. table[h].push_back(entry);
  270. elements_count++;
  271.  
  272. }
  273.  
  274. int hash(string s){
  275. int cont=0;
  276. int hax=0;
  277. int ind =0;
  278. int p=31; //arbitrary value that fits very well when equal to 31.
  279. for(int i=0;i<s.size();i++)
  280. {
  281. hax=int(hax+ int(s[i])*pow(p,cont))%size;
  282. cont++;
  283. };
  284. return hax;
  285.  
  286. }
  287.  
  288. genre search(string entry){
  289. int index = hash(entry);
  290. genre y;
  291. y.text = nullptr;
  292. for(auto x: table[index])
  293. {
  294. if(x.text == entry)
  295. {
  296. return x;
  297. }
  298. }
  299. return y; // :]
  300. }
  301.  
  302. };
  303.  
  304. void getmovies(string filename, hashtable* h, hashtableg* g, Trie* t){
  305. int id;
  306. string title;
  307. string genrename;
  308. vector<string> genres;
  309.  
  310. movie entry;
  311. genre genreitem;
  312. ifstream fin;
  313. string line; //to read each line
  314. fin.open(filename);
  315.  
  316. string skip; //used to receive unwanted data
  317. string aux;
  318.  
  319. getline(fin,skip);
  320. while(fin >> id){ //first data, receives id, and stop if that's
  321. getline(fin,skip,'"'); //skips past first "
  322. getline(fin,title,'"'); // reads title
  323. getline(fin,skip,'"'); //skips past "
  324. getline(fin,aux,'"'); //gets whole genres string
  325. stringstream temporary; //ss to parse genres string
  326. temporary << aux;
  327. while(getline(temporary,aux,'|')){ //parsing genres between |s
  328. genres.push_back(aux);
  329. genreitem = hashtableg->search(aux);
  330. if(genreitem.text == nullptr)
  331.  
  332.  
  333. }
  334. getline(fin,skip,'\n'); //you know the drill
  335. entry.init(id,title,genres); //fills movie object
  336. h->insert(entry); //inserts it in hashtable
  337. t->insert(title,id);//insert read data in trie
  338. genres.clear();
  339. }
  340. fin.close();
  341. }
  342.  
  343. void attrating(hashtable* hmovies, float newrating, int movieid){//function called every time a movie rating is readed to assign the value of it's rating.
  344. int index, cont=0, aux=0, c;
  345. index = hmovies->hash(movieid);
  346. for(auto x: hmovies->table[index])
  347. {
  348. if(x.id == movieid)
  349. {
  350. hmovies->table[index][cont].count++;
  351. c= hmovies->table[index][cont].count;
  352. aux = hmovies->table[index][cont].rating;
  353. hmovies->table[index][cont].rating= (aux*(c-1)+newrating)/c; //rating = ((rating*(count-1)) + newuserrating) /count;
  354.  
  355. }
  356. cont++;
  357. }
  358.  
  359.  
  360. }
  361.  
  362.  
  363. void getrating(string filename, hashtable* hratings, hashtable* hmovies){
  364. int userid, movieid;
  365. float rating;
  366. string aux;
  367. char ignore;
  368. ifstream fin;
  369. fin.open(filename);
  370. getline(fin, aux);
  371. while(fin >> userid){
  372. fin>>ignore;//ignore the commas
  373. fin >> movieid;
  374. fin>>ignore;
  375.  
  376. fin>>rating;
  377. attrating(hmovies, rating, movieid);
  378.  
  379. getline(fin, aux);
  380. }
  381.  
  382. fin.close();
  383.  
  384. }
  385.  
  386. void prefixsearch(Node* root, hashtable h){
  387. if(root->isLeaf){ //stop condition, prints stuff on screen
  388. movie movieinfo = h.search(root->id);
  389.  
  390. cout << root->id << " : " << movieinfo.title << " : ";
  391. int count = 0;
  392. for(auto x: movieinfo.genres){
  393. if(count)
  394. cout << "|";
  395. cout << x;
  396. count++;
  397. }
  398. cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
  399. }
  400. else{ //else tries on children (just like ww2 german experiments)
  401. for(int i = 0; i < alphabet_size; i++)
  402. if(root->children[i])
  403. prefixsearch(root->children[i], h);
  404. }
  405. }
  406.  
  407.  
  408. int main(void){
  409. hashtable h(5501);
  410. hashtable h1(27281);
  411. Trie t;
  412. getmovies("movie.csv",&h,&t);
  413. getrating("minirating.csv",&h1, &h);
  414. cout << "\nDataset MovieLens 20M - Use 'help' to show commands.\n";
  415. cout << "We do not store your data\n";
  416. int quit = 0;
  417. string input;
  418. while(!quit){
  419. cout << ">";
  420. getline(cin,input);
  421. istringstream iss(input);
  422. iss >> input;
  423. if (input == "help"){
  424. cout << "movie <title or prefix> - Movies by title or prefix.\n";
  425. cout << "user <userID> - Reviews made by user from given userID.\n";
  426. cout << "top<N> '<genre>' - Top N movies by rating in given genre (only movies with at least 1000 reviews are shown).\n";
  427. cout << "tags <list of tags> - Movies having the given tags (write tags inside apostrophes).\n";
  428. cout << "quit - Quits program.\n";
  429. }
  430. else if (input == "movie"){
  431. string skip;
  432. getline(iss,skip,' '); //skip space
  433. getline(iss,input); //gets full movie name written
  434. Node *query = t.search(input); //searches for stuff written
  435. if(query != nullptr){ //if it found something
  436. cout << "movieid:title:genres:rating:count" << endl << endl;
  437. if(query->isLeaf){
  438.  
  439. movie movieinfo = h.search(query->id);
  440.  
  441. cout << query->id << " : " << movieinfo.title << " : ";
  442. int count = 0;
  443. for(auto x: movieinfo.genres){
  444. if(count)
  445. cout << "|";
  446. cout << x;
  447. count++;
  448. }
  449. cout << " : " << movieinfo.rating << " : " << movieinfo.count << endl;
  450. }
  451. else
  452. prefixsearch(query,h);
  453. }
  454. else
  455. cout << "No results were found." << endl;
  456. }
  457. else if (input == "user"){
  458. iss >> input;
  459. h.searchuser(input);
  460. }
  461. else if (input.substr(0,3) == "top"){
  462. int N = stoi(input.substr(3,-1)); //gets stuff after 'top' from string into int
  463. iss >> input;
  464. if(input[0] != '\'' && input[-1] != '\'') //checks for apostrophes
  465. cout << "Genre not recognised. Remember to write it between apostrophes.\n";
  466. else{
  467. h.searchgenre(N,input.substr(1,input.length()-2)); //searches removing them
  468. }
  469. }
  470. else if (input == "tags"){
  471. vector<string> tags;
  472. string skip;
  473. string aux;
  474. getline(iss,skip,'\'');
  475. while(getline(iss,aux,'\''))
  476. tags.push_back(aux); //separates between apostrophes and removes them
  477. if(!tags.empty())
  478. h.searchtag(tags);
  479. else
  480. cout << "Tags not recognised. Remember to write them between apostrophes.\n";
  481. }
  482. else if (input == "quit")
  483. quit = 1;
  484. }
  485. return 0;
  486. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement